From ff91180ddf7284f3a292b1a76a83352d35321439 Mon Sep 17 00:00:00 2001 From: Arne Deprez Date: Sat, 3 Oct 2020 10:18:20 +0000 Subject: [PATCH] forkae update --- forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/forkae.c | 397 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny.c | 974 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-skinnyutil.h | 30 ++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/forkae.c | 457 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny.c |forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny.h | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-skinnyutil.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/forkae.c | 397 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny.c |forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-skinnyutil.h | 30 ++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/forkae.c | 457 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny.c | 1291 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny.h | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-skinnyutil.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/forkae.c | 397 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny.c |forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-skinnyutil.h | 30 ++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/forkae.c | 457 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny.c |forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny.h | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-skinnyutil.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/forkae.c | 397 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny.c |forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-skinnyutil.h | 30 ++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/forkae.c | 457 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny.c |forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny.h | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-skinnyutil.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/forkae.c | 397 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny.c |forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-skinnyutil.h | 30 ++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/forkae.c | 457 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny.c |forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny.h | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-skinnyutil.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/forkae.c | 397 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny.c |forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-skinnyutil.h | 30 ++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/aead-common.h | 15 +++++++++++++-- forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/forkae.c | 457 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny-avr.S |forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny.c | 1291 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny.h | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-skinnyutil.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 72 files changed, 118368 insertions(+), 9708 deletions(-) create mode 100644 forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny-avr.S create mode 100644 forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny-avr.S create mode 100644 forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny-avr.S create mode 100644 forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny-avr.S create mode 100644 forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny-avr.S create mode 100644 forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny-avr.S create mode 100644 forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny-avr.S create mode 100644 forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny-avr.S create mode 100644 forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny-avr.S create mode 100644 forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny-avr.S create mode 100644 forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny-avr.S create mode 100644 forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny-avr.S diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/aead-common.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/aead-common.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/forkae.c index 4a9671a..ead8ada 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,399 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + if (output_left && output_right){ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + } + else{ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, &tks, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + } + else{ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, &tks, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + } + else{ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, &tks, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny.c index c43ef98..4a7a01c 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny.c @@ -40,37 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; +#if !defined(__AVR__) -typedef struct +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds) { - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - - -} forkskinny_128_256_key_schedule_t; - -static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, const unsigned char key[32], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -81,8 +54,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -94,8 +67,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 28); /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -103,51 +76,48 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -156,95 +126,8 @@ static void forkskinny_128_256_round state->S[3] = s3; } -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - } - else{ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); - } - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -255,33 +138,37 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -290,96 +177,8 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - //uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - - -} forkskinny_128_384_key_schedule_t; - -static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, const unsigned char key[48], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -390,8 +189,8 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -401,10 +200,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 20); TK[2] = le_load_word32(key + 24); TK[3] = le_load_word32(key + 28); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -416,10 +216,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 36); TK[2] = le_load_word32(key + 40); TK[3] = le_load_word32(key + 44); + /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR3(TK[0]); @@ -427,51 +228,48 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -480,97 +278,8 @@ static void forkskinny_128_384_round state->S[3] = s3; } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - } - else{ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -581,33 +290,37 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -616,96 +329,9 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds) { - //uint16_t TK1[4]; /**< First part of the tweakey */ - //uint16_t TK2[4]; /**< Second part of the tweakey */ - //uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - - -} forkskinny_64_192_key_schedule_t; - -static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, const unsigned char key[24], uint8_t nb_rounds){ uint16_t TK[4]; unsigned round; @@ -716,8 +342,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 6); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny64_permute_tk(TK); } @@ -727,10 +353,11 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[1] = be_load_word16(key + 10); TK[2] = be_load_word16(key + 12); TK[3] = be_load_word16(key + 14); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR2(TK[0]); @@ -744,8 +371,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 22); /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR3(TK[0]); @@ -753,17 +380,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con } } -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -774,131 +392,44 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - } - else{ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -909,33 +440,35 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -944,61 +477,4 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, &ks, round); - } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); -} +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny.h index 0c1a707..1b4f5bf 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,172 @@ extern "C" { #endif /** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +/** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + +} forkskinny_128_256_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + // uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + +} forkskinny_128_384_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + + +} forkskinny_64_192_tweakey_schedule_t; + + +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_dec/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/aead-common.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/aead-common.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/forkae.c index 4a9671a..5b7ba3d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/forkae.c @@ -22,7 +22,7 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" +#include "internal-skinnyutil.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +138,458 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right,fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + memcpy(fstate.TK3, state.TK3, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny.c index af29f77..b96a04c 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny.c @@ -22,12 +22,23 @@ #include "internal-forkskinny.h" #include "internal-skinnyutil.h" -#include /** * \brief 7-bit round constants for all ForkSkinny block ciphers. */ -static unsigned char const RC[87] = {0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b,0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10}; +static unsigned char const RC[87] = { + 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, + 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, + 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, + 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, + 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, + 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, + 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, + 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, + 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b, + 0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, + 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 +}; static const uint32_t T[256] = {0x65656565, 0x4c4c4c4c, 0x6a6a6a6a, 0x42424242, 0x4b4b4b4b, 0x63636363, 0x43434343, 0x6b6b6b6b, 0x55555555, 0x75757575, 0x5a5a5a5a, 0x7a7a7a7a, 0x53535353, 0x73737373, 0x5b5b5b5b, 0x7b7b7b7b, 0x35353535, 0x8c8c8c8c, 0x3a3a3a3a, 0x81818181, 0x89898989, 0x33333333, 0x80808080, 0x3b3b3b3b, 0x95959595, 0x25252525, 0x98989898, 0x2a2a2a2a, 0x90909090, 0x23232323, 0x99999999, 0x2b2b2b2b, 0xe5e5e5e5, 0xcccccccc, 0xe8e8e8e8, 0xc1c1c1c1, 0xc9c9c9c9, 0xe0e0e0e0, 0xc0c0c0c0, 0xe9e9e9e9, 0xd5d5d5d5, 0xf5f5f5f5, 0xd8d8d8d8, 0xf8f8f8f8, 0xd0d0d0d0, 0xf0f0f0f0, 0xd9d9d9d9, 0xf9f9f9f9, 0xa5a5a5a5, 0x1c1c1c1c, 0xa8a8a8a8, 0x12121212, 0x1b1b1b1b, 0xa0a0a0a0, 0x13131313, 0xa9a9a9a9, 0x05050505, 0xb5b5b5b5, 0x0a0a0a0a, 0xb8b8b8b8, 0x03030303, 0xb0b0b0b0, 0x0b0b0b0b, 0xb9b9b9b9, 0x32323232, 0x88888888, 0x3c3c3c3c, 0x85858585, 0x8d8d8d8d, 0x34343434, 0x84848484, 0x3d3d3d3d, 0x91919191, 0x22222222, 0x9c9c9c9c, 0x2c2c2c2c, 0x94949494, 0x24242424, 0x9d9d9d9d, 0x2d2d2d2d, 0x62626262, 0x4a4a4a4a, 0x6c6c6c6c, 0x45454545, 0x4d4d4d4d, 0x64646464, 0x44444444, 0x6d6d6d6d, 0x52525252, 0x72727272, 0x5c5c5c5c, 0x7c7c7c7c, 0x54545454, 0x74747474, 0x5d5d5d5d, 0x7d7d7d7d, 0xa1a1a1a1, 0x1a1a1a1a, 0xacacacac, 0x15151515, 0x1d1d1d1d, 0xa4a4a4a4, 0x14141414, 0xadadadad, 0x02020202, 0xb1b1b1b1, 0x0c0c0c0c, 0xbcbcbcbc, 0x04040404, 0xb4b4b4b4, 0x0d0d0d0d, 0xbdbdbdbd, 0xe1e1e1e1, 0xc8c8c8c8, 0xecececec, 0xc5c5c5c5, 0xcdcdcdcd, 0xe4e4e4e4, 0xc4c4c4c4, 0xedededed, 0xd1d1d1d1, 0xf1f1f1f1, 0xdcdcdcdc, 0xfcfcfcfc, 0xd4d4d4d4, 0xf4f4f4f4, 0xdddddddd, 0xfdfdfdfd, 0x36363636, 0x8e8e8e8e, 0x38383838, 0x82828282, 0x8b8b8b8b, 0x30303030, 0x83838383, 0x39393939, 0x96969696, 0x26262626, 0x9a9a9a9a, 0x28282828, 0x93939393, 0x20202020, 0x9b9b9b9b, 0x29292929, 0x66666666, 0x4e4e4e4e, 0x68686868, 0x41414141, 0x49494949, 0x60606060, 0x40404040, 0x69696969, 0x56565656, 0x76767676, 0x58585858, 0x78787878, 0x50505050, 0x70707070, 0x59595959, 0x79797979, 0xa6a6a6a6, 0x1e1e1e1e, 0xaaaaaaaa, 0x11111111, 0x19191919, 0xa3a3a3a3, 0x10101010, 0xabababab, 0x06060606, 0xb6b6b6b6, 0x08080808, 0xbabababa, 0x00000000, 0xb3b3b3b3, 0x09090909, 0xbbbbbbbb, 0xe6e6e6e6, 0xcececece, 0xeaeaeaea, 0xc2c2c2c2, 0xcbcbcbcb, 0xe3e3e3e3, 0xc3c3c3c3, 0xebebebeb, 0xd6d6d6d6, 0xf6f6f6f6, 0xdadadada, 0xfafafafa, 0xd3d3d3d3, 0xf3f3f3f3, 0xdbdbdbdb, 0xfbfbfbfb, 0x31313131, 0x8a8a8a8a, 0x3e3e3e3e, 0x86868686, 0x8f8f8f8f, 0x37373737, 0x87878787, 0x3f3f3f3f, 0x92929292, 0x21212121, 0x9e9e9e9e, 0x2e2e2e2e, 0x97979797, 0x27272727, 0x9f9f9f9f, 0x2f2f2f2f, 0x61616161, 0x48484848, 0x6e6e6e6e, 0x46464646, 0x4f4f4f4f, 0x67676767, 0x47474747, 0x6f6f6f6f, 0x51515151, 0x71717171, 0x5e5e5e5e, 0x7e7e7e7e, 0x57575757, 0x77777777, 0x5f5f5f5f, 0x7f7f7f7f, 0xa2a2a2a2, 0x18181818, 0xaeaeaeae, 0x16161616, 0x1f1f1f1f, 0xa7a7a7a7, 0x17171717, 0xafafafaf, 0x01010101, 0xb2b2b2b2, 0x0e0e0e0e, 0xbebebebe, 0x07070707, 0xb7b7b7b7, 0x0f0f0f0f, 0xbfbfbfbf, 0xe2e2e2e2, 0xcacacaca, 0xeeeeeeee, 0xc6c6c6c6, 0xcfcfcfcf, 0xe7e7e7e7, 0xc7c7c7c7, 0xefefefef, 0xd2d2d2d2, 0xf2f2f2f2, 0xdededede, 0xfefefefe, 0xd7d7d7d7, 0xf7f7f7f7, 0xdfdfdfdf, 0xffffffff}; static const uint32_t T_inv[256] = {0xacacacac, 0xe8e8e8e8, 0x68686868, 0x3c3c3c3c, 0x6c6c6c6c, 0x38383838, 0xa8a8a8a8, 0xecececec, 0xaaaaaaaa, 0xaeaeaeae, 0x3a3a3a3a, 0x3e3e3e3e, 0x6a6a6a6a, 0x6e6e6e6e, 0xeaeaeaea, 0xeeeeeeee, 0xa6a6a6a6, 0xa3a3a3a3, 0x33333333, 0x36363636, 0x66666666, 0x63636363, 0xe3e3e3e3, 0xe6e6e6e6, 0xe1e1e1e1, 0xa4a4a4a4, 0x61616161, 0x34343434, 0x31313131, 0x64646464, 0xa1a1a1a1, 0xe4e4e4e4, 0x8d8d8d8d, 0xc9c9c9c9, 0x49494949, 0x1d1d1d1d, 0x4d4d4d4d, 0x19191919, 0x89898989, 0xcdcdcdcd, 0x8b8b8b8b, 0x8f8f8f8f, 0x1b1b1b1b, 0x1f1f1f1f, 0x4b4b4b4b, 0x4f4f4f4f, 0xcbcbcbcb, 0xcfcfcfcf, 0x85858585, 0xc0c0c0c0, 0x40404040, 0x15151515, 0x45454545, 0x10101010, 0x80808080, 0xc5c5c5c5, 0x82828282, 0x87878787, 0x12121212, 0x17171717, 0x42424242, 0x47474747, 0xc2c2c2c2, 0xc7c7c7c7, 0x96969696, 0x93939393, 0x03030303, 0x06060606, 0x56565656, 0x53535353, 0xd3d3d3d3, 0xd6d6d6d6, 0xd1d1d1d1, 0x94949494, 0x51515151, 0x04040404, 0x01010101, 0x54545454, 0x91919191, 0xd4d4d4d4, 0x9c9c9c9c, 0xd8d8d8d8, 0x58585858, 0x0c0c0c0c, 0x5c5c5c5c, 0x08080808, 0x98989898, 0xdcdcdcdc, 0x9a9a9a9a, 0x9e9e9e9e, 0x0a0a0a0a, 0x0e0e0e0e, 0x5a5a5a5a, 0x5e5e5e5e, 0xdadadada, 0xdededede, 0x95959595, 0xd0d0d0d0, 0x50505050, 0x05050505, 0x55555555, 0x00000000, 0x90909090, 0xd5d5d5d5, 0x92929292, 0x97979797, 0x02020202, 0x07070707, 0x52525252, 0x57575757, 0xd2d2d2d2, 0xd7d7d7d7, 0x9d9d9d9d, 0xd9d9d9d9, 0x59595959, 0x0d0d0d0d, 0x5d5d5d5d, 0x09090909, 0x99999999, 0xdddddddd, 0x9b9b9b9b, 0x9f9f9f9f, 0x0b0b0b0b, 0x0f0f0f0f, 0x5b5b5b5b, 0x5f5f5f5f, 0xdbdbdbdb, 0xdfdfdfdf, 0x16161616, 0x13131313, 0x83838383, 0x86868686, 0x46464646, 0x43434343, 0xc3c3c3c3, 0xc6c6c6c6, 0x41414141, 0x14141414, 0xc1c1c1c1, 0x84848484, 0x11111111, 0x44444444, 0x81818181, 0xc4c4c4c4, 0x1c1c1c1c, 0x48484848, 0xc8c8c8c8, 0x8c8c8c8c, 0x4c4c4c4c, 0x18181818, 0x88888888, 0xcccccccc, 0x1a1a1a1a, 0x1e1e1e1e, 0x8a8a8a8a, 0x8e8e8e8e, 0x4a4a4a4a, 0x4e4e4e4e, 0xcacacaca, 0xcececece, 0x35353535, 0x60606060, 0xe0e0e0e0, 0xa5a5a5a5, 0x65656565, 0x30303030, 0xa0a0a0a0, 0xe5e5e5e5, 0x32323232, 0x37373737, 0xa2a2a2a2, 0xa7a7a7a7, 0x62626262, 0x67676767, 0xe2e2e2e2, 0xe7e7e7e7, 0x3d3d3d3d, 0x69696969, 0xe9e9e9e9, 0xadadadad, 0x6d6d6d6d, 0x39393939, 0xa9a9a9a9, 0xedededed, 0x3b3b3b3b, 0x3f3f3f3f, 0xabababab, 0xafafafaf, 0x6b6b6b6b, 0x6f6f6f6f, 0xebebebeb, 0xefefefef, 0x26262626, 0x23232323, 0xb3b3b3b3, 0xb6b6b6b6, 0x76767676, 0x73737373, 0xf3f3f3f3, 0xf6f6f6f6, 0x71717171, 0x24242424, 0xf1f1f1f1, 0xb4b4b4b4, 0x21212121, 0x74747474, 0xb1b1b1b1, 0xf4f4f4f4, 0x2c2c2c2c, 0x78787878, 0xf8f8f8f8, 0xbcbcbcbc, 0x7c7c7c7c, 0x28282828, 0xb8b8b8b8, 0xfcfcfcfc, 0x2a2a2a2a, 0x2e2e2e2e, 0xbabababa, 0xbebebebe, 0x7a7a7a7a, 0x7e7e7e7e, 0xfafafafa, 0xfefefefe, 0x25252525, 0x70707070, 0xf0f0f0f0, 0xb5b5b5b5, 0x75757575, 0x20202020, 0xb0b0b0b0, 0xf5f5f5f5, 0x22222222, 0x27272727, 0xb2b2b2b2, 0xb7b7b7b7, 0x72727272, 0x77777777, 0xf2f2f2f2, 0xf7f7f7f7, 0x2d2d2d2d, 0x79797979, 0xf9f9f9f9, 0xbdbdbdbd, 0x7d7d7d7d, 0x29292929, 0xb9b9b9b9, 0xfdfdfdfd, 0x2b2b2b2b, 0x2f2f2f2f, 0xbbbbbbbb, 0xbfbfbfbf, 0x7b7b7b7b, 0x7f7f7f7f, 0xfbfbfbfb, 0xffffffff}; @@ -36,77 +47,12 @@ static const uint32_t AC_column0[87] = {0x1000101, 0x3000303, 0x7000707, 0xf000f static const uint32_t AC_column1[87] = {0x0, 0x0, 0x0, 0x0, 0x10000, 0x30000, 0x70000, 0x70000, 0x70000, 0x70000, 0x60000, 0x50000, 0x30000, 0x70000, 0x70000, 0x70000, 0x60000, 0x40000, 0x10000, 0x30000, 0x70000, 0x70000, 0x60000, 0x50000, 0x20000, 0x50000, 0x30000, 0x70000, 0x60000, 0x40000, 0x0, 0x0, 0x10000, 0x30000, 0x60000, 0x50000, 0x30000, 0x70000, 0x60000, 0x50000, 0x20000, 0x40000, 0x10000, 0x30000, 0x60000, 0x40000, 0x0, 0x10000, 0x20000, 0x50000, 0x20000, 0x50000, 0x30000, 0x60000, 0x40000, 0x0, 0x0, 0x0, 0x10000, 0x20000, 0x50000, 0x30000, 0x70000, 0x70000, 0x60000, 0x40000, 0x0, 0x10000, 0x30000, 0x70000, 0x60000, 0x50000, 0x30000, 0x60000, 0x50000, 0x30000, 0x60000, 0x40000, 0x10000, 0x20000, 0x40000, 0x10000, 0x20000, 0x50000, 0x20000, 0x40000, 0x10000}; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 +#if !defined(__AVR__) -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; - -#define load_column_8(dest, src) \ - do { \ - dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ - dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ - dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ - dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ - } while(0) - -#define store_column_8(dest, src) \ - do { \ - dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ - dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ - dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ - dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ - } while(0) - -#define rows_to_columns_32(columns, row0, row1, row2, row3) \ - do { \ - columns[0] = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ - columns[1] = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ - columns[2] = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ - columns[3] = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ - } while(0) - -#define columns_to_rows_32(rows, column0, column1, column2, column3) rows_to_columns_32(rows, column0, column1, column2, column3) - -#define TK_to_column_256(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -115,102 +61,45 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_256(tk_columns, state); + /* Perform all requested rounds */ + for (; first < last; ++first) { - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + TK_to_column_256(tk_columns, state); - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_left, state.S); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - } + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; } -static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; + + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -219,215 +108,128 @@ static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state skinny128_inv_permute_tk(state->TK2); /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_256_inv_round(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - // temp = 0x020000; - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} - -static void forkskinny_128_256_inv_round_final(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FINAL ROUND - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_256_inv_round_first(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER*2)-1); - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } - forkskinny_128_256_inv_round_final(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - } - - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); +} - /* Generate the left output block after another "before" rounds */ - forkskinny_128_256_inv_round_first(&state, FORKSKINNY_128_256_ROUNDS_BEFORE-1); - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - forkskinny_128_256_inv_round_final(&state, 0); - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - store_column_8(output_right,fstate.S); - } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -#define TK_to_column_384(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -436,148 +238,48 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_384(tk_columns, state); - - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); -} + /* Perform all requested rounds */ + for (; first < last; ++first) { -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } + TK_to_column_384(tk_columns, state); - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_left, state.S); - - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } -} + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; +} -static void forkskinny_128_384_inv_round_first(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -588,177 +290,144 @@ static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsi skinny128_inv_permute_tk(state->TK2); skinny128_inv_permute_tk(state->TK3); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + /* Inverse mix of the columns */ + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } -static void forkskinny_128_384_inv_round_final(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + // FINAL ROUND - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); - } - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_384_inv_round_first(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1); - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - forkskinny_128_384_inv_round_final(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - memcpy(fstate.TK3, state.TK3, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); - - /* Generate the left output block after another "before" rounds */ - forkskinny_128_384_inv_round_first(&state, FORKSKINNY_128_384_ROUNDS_BEFORE - 1); - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } - forkskinny_128_384_inv_round_final(&state, 0); - - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); - } - store_column_8(output_right, fstate.S); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; + } -} forkskinny_64_192_state_t; + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; + } +} -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -769,144 +438,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -917,42 +497,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -961,92 +544,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-skinnyutil.h index 83136cb..f2bc8ca 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/opt32_table/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: @@ -319,6 +349,61 @@ do { \ x = ((x << 1) & 0xEEEEU) | ((x >> 3) & 0x1111U); \ } while (0) +#define rows_to_columns_32(column0, column1, column2, column3, row0, row1, row2, row3) \ + do { \ + column0 = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ + column1 = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ + column2 = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ + column3 = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ + } while(0) + +#define columns_to_rows_32(row0, row1, row2, row3, column0, column1, column2, column3) rows_to_columns_32(row0, row1, row2, row3, column0, column1, column2, column3) + +#define load_column_8(dest, src) \ + do { \ + dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ + dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ + dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ + dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ + } while(0) + +#define store_column_8(dest, src) \ + do { \ + dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ + dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ + dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ + dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ + } while(0) + + +#define TK_to_column_256(columns, state) \ + do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ + } while(0) + +#define TK_to_column_384(columns, state) \ +do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ +} while(0) + /** @endcond */ #ifdef __cplusplus diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/aead-common.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/aead-common.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/forkae.c index 4a9671a..ead8ada 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,399 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + if (output_left && output_right){ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + } + else{ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, &tks, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + } + else{ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, &tks, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + } + else{ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, &tks, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny.c index c43ef98..4a7a01c 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny.c @@ -40,37 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; +#if !defined(__AVR__) -typedef struct +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds) { - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - - -} forkskinny_128_256_key_schedule_t; - -static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, const unsigned char key[32], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -81,8 +54,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -94,8 +67,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 28); /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -103,51 +76,48 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -156,95 +126,8 @@ static void forkskinny_128_256_round state->S[3] = s3; } -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - } - else{ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); - } - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -255,33 +138,37 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -290,96 +177,8 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - //uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - - -} forkskinny_128_384_key_schedule_t; - -static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, const unsigned char key[48], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -390,8 +189,8 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -401,10 +200,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 20); TK[2] = le_load_word32(key + 24); TK[3] = le_load_word32(key + 28); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -416,10 +216,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 36); TK[2] = le_load_word32(key + 40); TK[3] = le_load_word32(key + 44); + /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR3(TK[0]); @@ -427,51 +228,48 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -480,97 +278,8 @@ static void forkskinny_128_384_round state->S[3] = s3; } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - } - else{ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -581,33 +290,37 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -616,96 +329,9 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds) { - //uint16_t TK1[4]; /**< First part of the tweakey */ - //uint16_t TK2[4]; /**< Second part of the tweakey */ - //uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - - -} forkskinny_64_192_key_schedule_t; - -static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, const unsigned char key[24], uint8_t nb_rounds){ uint16_t TK[4]; unsigned round; @@ -716,8 +342,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 6); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny64_permute_tk(TK); } @@ -727,10 +353,11 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[1] = be_load_word16(key + 10); TK[2] = be_load_word16(key + 12); TK[3] = be_load_word16(key + 14); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR2(TK[0]); @@ -744,8 +371,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 22); /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR3(TK[0]); @@ -753,17 +380,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con } } -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -774,131 +392,44 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - } - else{ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -909,33 +440,35 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -944,61 +477,4 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, &ks, round); - } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); -} +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny.h index 0c1a707..1b4f5bf 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,172 @@ extern "C" { #endif /** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +/** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + +} forkskinny_128_256_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + // uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + +} forkskinny_128_384_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + + +} forkskinny_64_192_tweakey_schedule_t; + + +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_dec/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/aead-common.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/aead-common.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/forkae.c index 4a9671a..5b7ba3d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/forkae.c @@ -22,7 +22,7 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" +#include "internal-skinnyutil.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +138,458 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right,fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + memcpy(fstate.TK3, state.TK3, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny.c index af29f77..b96a04c 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny.c @@ -22,12 +22,23 @@ #include "internal-forkskinny.h" #include "internal-skinnyutil.h" -#include /** * \brief 7-bit round constants for all ForkSkinny block ciphers. */ -static unsigned char const RC[87] = {0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b,0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10}; +static unsigned char const RC[87] = { + 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, + 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, + 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, + 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, + 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, + 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, + 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, + 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, + 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b, + 0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, + 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 +}; static const uint32_t T[256] = {0x65656565, 0x4c4c4c4c, 0x6a6a6a6a, 0x42424242, 0x4b4b4b4b, 0x63636363, 0x43434343, 0x6b6b6b6b, 0x55555555, 0x75757575, 0x5a5a5a5a, 0x7a7a7a7a, 0x53535353, 0x73737373, 0x5b5b5b5b, 0x7b7b7b7b, 0x35353535, 0x8c8c8c8c, 0x3a3a3a3a, 0x81818181, 0x89898989, 0x33333333, 0x80808080, 0x3b3b3b3b, 0x95959595, 0x25252525, 0x98989898, 0x2a2a2a2a, 0x90909090, 0x23232323, 0x99999999, 0x2b2b2b2b, 0xe5e5e5e5, 0xcccccccc, 0xe8e8e8e8, 0xc1c1c1c1, 0xc9c9c9c9, 0xe0e0e0e0, 0xc0c0c0c0, 0xe9e9e9e9, 0xd5d5d5d5, 0xf5f5f5f5, 0xd8d8d8d8, 0xf8f8f8f8, 0xd0d0d0d0, 0xf0f0f0f0, 0xd9d9d9d9, 0xf9f9f9f9, 0xa5a5a5a5, 0x1c1c1c1c, 0xa8a8a8a8, 0x12121212, 0x1b1b1b1b, 0xa0a0a0a0, 0x13131313, 0xa9a9a9a9, 0x05050505, 0xb5b5b5b5, 0x0a0a0a0a, 0xb8b8b8b8, 0x03030303, 0xb0b0b0b0, 0x0b0b0b0b, 0xb9b9b9b9, 0x32323232, 0x88888888, 0x3c3c3c3c, 0x85858585, 0x8d8d8d8d, 0x34343434, 0x84848484, 0x3d3d3d3d, 0x91919191, 0x22222222, 0x9c9c9c9c, 0x2c2c2c2c, 0x94949494, 0x24242424, 0x9d9d9d9d, 0x2d2d2d2d, 0x62626262, 0x4a4a4a4a, 0x6c6c6c6c, 0x45454545, 0x4d4d4d4d, 0x64646464, 0x44444444, 0x6d6d6d6d, 0x52525252, 0x72727272, 0x5c5c5c5c, 0x7c7c7c7c, 0x54545454, 0x74747474, 0x5d5d5d5d, 0x7d7d7d7d, 0xa1a1a1a1, 0x1a1a1a1a, 0xacacacac, 0x15151515, 0x1d1d1d1d, 0xa4a4a4a4, 0x14141414, 0xadadadad, 0x02020202, 0xb1b1b1b1, 0x0c0c0c0c, 0xbcbcbcbc, 0x04040404, 0xb4b4b4b4, 0x0d0d0d0d, 0xbdbdbdbd, 0xe1e1e1e1, 0xc8c8c8c8, 0xecececec, 0xc5c5c5c5, 0xcdcdcdcd, 0xe4e4e4e4, 0xc4c4c4c4, 0xedededed, 0xd1d1d1d1, 0xf1f1f1f1, 0xdcdcdcdc, 0xfcfcfcfc, 0xd4d4d4d4, 0xf4f4f4f4, 0xdddddddd, 0xfdfdfdfd, 0x36363636, 0x8e8e8e8e, 0x38383838, 0x82828282, 0x8b8b8b8b, 0x30303030, 0x83838383, 0x39393939, 0x96969696, 0x26262626, 0x9a9a9a9a, 0x28282828, 0x93939393, 0x20202020, 0x9b9b9b9b, 0x29292929, 0x66666666, 0x4e4e4e4e, 0x68686868, 0x41414141, 0x49494949, 0x60606060, 0x40404040, 0x69696969, 0x56565656, 0x76767676, 0x58585858, 0x78787878, 0x50505050, 0x70707070, 0x59595959, 0x79797979, 0xa6a6a6a6, 0x1e1e1e1e, 0xaaaaaaaa, 0x11111111, 0x19191919, 0xa3a3a3a3, 0x10101010, 0xabababab, 0x06060606, 0xb6b6b6b6, 0x08080808, 0xbabababa, 0x00000000, 0xb3b3b3b3, 0x09090909, 0xbbbbbbbb, 0xe6e6e6e6, 0xcececece, 0xeaeaeaea, 0xc2c2c2c2, 0xcbcbcbcb, 0xe3e3e3e3, 0xc3c3c3c3, 0xebebebeb, 0xd6d6d6d6, 0xf6f6f6f6, 0xdadadada, 0xfafafafa, 0xd3d3d3d3, 0xf3f3f3f3, 0xdbdbdbdb, 0xfbfbfbfb, 0x31313131, 0x8a8a8a8a, 0x3e3e3e3e, 0x86868686, 0x8f8f8f8f, 0x37373737, 0x87878787, 0x3f3f3f3f, 0x92929292, 0x21212121, 0x9e9e9e9e, 0x2e2e2e2e, 0x97979797, 0x27272727, 0x9f9f9f9f, 0x2f2f2f2f, 0x61616161, 0x48484848, 0x6e6e6e6e, 0x46464646, 0x4f4f4f4f, 0x67676767, 0x47474747, 0x6f6f6f6f, 0x51515151, 0x71717171, 0x5e5e5e5e, 0x7e7e7e7e, 0x57575757, 0x77777777, 0x5f5f5f5f, 0x7f7f7f7f, 0xa2a2a2a2, 0x18181818, 0xaeaeaeae, 0x16161616, 0x1f1f1f1f, 0xa7a7a7a7, 0x17171717, 0xafafafaf, 0x01010101, 0xb2b2b2b2, 0x0e0e0e0e, 0xbebebebe, 0x07070707, 0xb7b7b7b7, 0x0f0f0f0f, 0xbfbfbfbf, 0xe2e2e2e2, 0xcacacaca, 0xeeeeeeee, 0xc6c6c6c6, 0xcfcfcfcf, 0xe7e7e7e7, 0xc7c7c7c7, 0xefefefef, 0xd2d2d2d2, 0xf2f2f2f2, 0xdededede, 0xfefefefe, 0xd7d7d7d7, 0xf7f7f7f7, 0xdfdfdfdf, 0xffffffff}; static const uint32_t T_inv[256] = {0xacacacac, 0xe8e8e8e8, 0x68686868, 0x3c3c3c3c, 0x6c6c6c6c, 0x38383838, 0xa8a8a8a8, 0xecececec, 0xaaaaaaaa, 0xaeaeaeae, 0x3a3a3a3a, 0x3e3e3e3e, 0x6a6a6a6a, 0x6e6e6e6e, 0xeaeaeaea, 0xeeeeeeee, 0xa6a6a6a6, 0xa3a3a3a3, 0x33333333, 0x36363636, 0x66666666, 0x63636363, 0xe3e3e3e3, 0xe6e6e6e6, 0xe1e1e1e1, 0xa4a4a4a4, 0x61616161, 0x34343434, 0x31313131, 0x64646464, 0xa1a1a1a1, 0xe4e4e4e4, 0x8d8d8d8d, 0xc9c9c9c9, 0x49494949, 0x1d1d1d1d, 0x4d4d4d4d, 0x19191919, 0x89898989, 0xcdcdcdcd, 0x8b8b8b8b, 0x8f8f8f8f, 0x1b1b1b1b, 0x1f1f1f1f, 0x4b4b4b4b, 0x4f4f4f4f, 0xcbcbcbcb, 0xcfcfcfcf, 0x85858585, 0xc0c0c0c0, 0x40404040, 0x15151515, 0x45454545, 0x10101010, 0x80808080, 0xc5c5c5c5, 0x82828282, 0x87878787, 0x12121212, 0x17171717, 0x42424242, 0x47474747, 0xc2c2c2c2, 0xc7c7c7c7, 0x96969696, 0x93939393, 0x03030303, 0x06060606, 0x56565656, 0x53535353, 0xd3d3d3d3, 0xd6d6d6d6, 0xd1d1d1d1, 0x94949494, 0x51515151, 0x04040404, 0x01010101, 0x54545454, 0x91919191, 0xd4d4d4d4, 0x9c9c9c9c, 0xd8d8d8d8, 0x58585858, 0x0c0c0c0c, 0x5c5c5c5c, 0x08080808, 0x98989898, 0xdcdcdcdc, 0x9a9a9a9a, 0x9e9e9e9e, 0x0a0a0a0a, 0x0e0e0e0e, 0x5a5a5a5a, 0x5e5e5e5e, 0xdadadada, 0xdededede, 0x95959595, 0xd0d0d0d0, 0x50505050, 0x05050505, 0x55555555, 0x00000000, 0x90909090, 0xd5d5d5d5, 0x92929292, 0x97979797, 0x02020202, 0x07070707, 0x52525252, 0x57575757, 0xd2d2d2d2, 0xd7d7d7d7, 0x9d9d9d9d, 0xd9d9d9d9, 0x59595959, 0x0d0d0d0d, 0x5d5d5d5d, 0x09090909, 0x99999999, 0xdddddddd, 0x9b9b9b9b, 0x9f9f9f9f, 0x0b0b0b0b, 0x0f0f0f0f, 0x5b5b5b5b, 0x5f5f5f5f, 0xdbdbdbdb, 0xdfdfdfdf, 0x16161616, 0x13131313, 0x83838383, 0x86868686, 0x46464646, 0x43434343, 0xc3c3c3c3, 0xc6c6c6c6, 0x41414141, 0x14141414, 0xc1c1c1c1, 0x84848484, 0x11111111, 0x44444444, 0x81818181, 0xc4c4c4c4, 0x1c1c1c1c, 0x48484848, 0xc8c8c8c8, 0x8c8c8c8c, 0x4c4c4c4c, 0x18181818, 0x88888888, 0xcccccccc, 0x1a1a1a1a, 0x1e1e1e1e, 0x8a8a8a8a, 0x8e8e8e8e, 0x4a4a4a4a, 0x4e4e4e4e, 0xcacacaca, 0xcececece, 0x35353535, 0x60606060, 0xe0e0e0e0, 0xa5a5a5a5, 0x65656565, 0x30303030, 0xa0a0a0a0, 0xe5e5e5e5, 0x32323232, 0x37373737, 0xa2a2a2a2, 0xa7a7a7a7, 0x62626262, 0x67676767, 0xe2e2e2e2, 0xe7e7e7e7, 0x3d3d3d3d, 0x69696969, 0xe9e9e9e9, 0xadadadad, 0x6d6d6d6d, 0x39393939, 0xa9a9a9a9, 0xedededed, 0x3b3b3b3b, 0x3f3f3f3f, 0xabababab, 0xafafafaf, 0x6b6b6b6b, 0x6f6f6f6f, 0xebebebeb, 0xefefefef, 0x26262626, 0x23232323, 0xb3b3b3b3, 0xb6b6b6b6, 0x76767676, 0x73737373, 0xf3f3f3f3, 0xf6f6f6f6, 0x71717171, 0x24242424, 0xf1f1f1f1, 0xb4b4b4b4, 0x21212121, 0x74747474, 0xb1b1b1b1, 0xf4f4f4f4, 0x2c2c2c2c, 0x78787878, 0xf8f8f8f8, 0xbcbcbcbc, 0x7c7c7c7c, 0x28282828, 0xb8b8b8b8, 0xfcfcfcfc, 0x2a2a2a2a, 0x2e2e2e2e, 0xbabababa, 0xbebebebe, 0x7a7a7a7a, 0x7e7e7e7e, 0xfafafafa, 0xfefefefe, 0x25252525, 0x70707070, 0xf0f0f0f0, 0xb5b5b5b5, 0x75757575, 0x20202020, 0xb0b0b0b0, 0xf5f5f5f5, 0x22222222, 0x27272727, 0xb2b2b2b2, 0xb7b7b7b7, 0x72727272, 0x77777777, 0xf2f2f2f2, 0xf7f7f7f7, 0x2d2d2d2d, 0x79797979, 0xf9f9f9f9, 0xbdbdbdbd, 0x7d7d7d7d, 0x29292929, 0xb9b9b9b9, 0xfdfdfdfd, 0x2b2b2b2b, 0x2f2f2f2f, 0xbbbbbbbb, 0xbfbfbfbf, 0x7b7b7b7b, 0x7f7f7f7f, 0xfbfbfbfb, 0xffffffff}; @@ -36,77 +47,12 @@ static const uint32_t AC_column0[87] = {0x1000101, 0x3000303, 0x7000707, 0xf000f static const uint32_t AC_column1[87] = {0x0, 0x0, 0x0, 0x0, 0x10000, 0x30000, 0x70000, 0x70000, 0x70000, 0x70000, 0x60000, 0x50000, 0x30000, 0x70000, 0x70000, 0x70000, 0x60000, 0x40000, 0x10000, 0x30000, 0x70000, 0x70000, 0x60000, 0x50000, 0x20000, 0x50000, 0x30000, 0x70000, 0x60000, 0x40000, 0x0, 0x0, 0x10000, 0x30000, 0x60000, 0x50000, 0x30000, 0x70000, 0x60000, 0x50000, 0x20000, 0x40000, 0x10000, 0x30000, 0x60000, 0x40000, 0x0, 0x10000, 0x20000, 0x50000, 0x20000, 0x50000, 0x30000, 0x60000, 0x40000, 0x0, 0x0, 0x0, 0x10000, 0x20000, 0x50000, 0x30000, 0x70000, 0x70000, 0x60000, 0x40000, 0x0, 0x10000, 0x30000, 0x70000, 0x60000, 0x50000, 0x30000, 0x60000, 0x50000, 0x30000, 0x60000, 0x40000, 0x10000, 0x20000, 0x40000, 0x10000, 0x20000, 0x50000, 0x20000, 0x40000, 0x10000}; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 +#if !defined(__AVR__) -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; - -#define load_column_8(dest, src) \ - do { \ - dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ - dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ - dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ - dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ - } while(0) - -#define store_column_8(dest, src) \ - do { \ - dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ - dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ - dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ - dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ - } while(0) - -#define rows_to_columns_32(columns, row0, row1, row2, row3) \ - do { \ - columns[0] = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ - columns[1] = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ - columns[2] = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ - columns[3] = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ - } while(0) - -#define columns_to_rows_32(rows, column0, column1, column2, column3) rows_to_columns_32(rows, column0, column1, column2, column3) - -#define TK_to_column_256(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -115,102 +61,45 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_256(tk_columns, state); + /* Perform all requested rounds */ + for (; first < last; ++first) { - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + TK_to_column_256(tk_columns, state); - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_left, state.S); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - } + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; } -static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; + + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -219,215 +108,128 @@ static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state skinny128_inv_permute_tk(state->TK2); /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_256_inv_round(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - // temp = 0x020000; - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} - -static void forkskinny_128_256_inv_round_final(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FINAL ROUND - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_256_inv_round_first(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER*2)-1); - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } - forkskinny_128_256_inv_round_final(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - } - - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); +} - /* Generate the left output block after another "before" rounds */ - forkskinny_128_256_inv_round_first(&state, FORKSKINNY_128_256_ROUNDS_BEFORE-1); - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - forkskinny_128_256_inv_round_final(&state, 0); - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - store_column_8(output_right,fstate.S); - } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -#define TK_to_column_384(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -436,148 +238,48 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_384(tk_columns, state); - - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); -} + /* Perform all requested rounds */ + for (; first < last; ++first) { -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } + TK_to_column_384(tk_columns, state); - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_left, state.S); - - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } -} + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; +} -static void forkskinny_128_384_inv_round_first(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -588,177 +290,144 @@ static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsi skinny128_inv_permute_tk(state->TK2); skinny128_inv_permute_tk(state->TK3); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + /* Inverse mix of the columns */ + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } -static void forkskinny_128_384_inv_round_final(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + // FINAL ROUND - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); - } - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_384_inv_round_first(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1); - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - forkskinny_128_384_inv_round_final(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - memcpy(fstate.TK3, state.TK3, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); - - /* Generate the left output block after another "before" rounds */ - forkskinny_128_384_inv_round_first(&state, FORKSKINNY_128_384_ROUNDS_BEFORE - 1); - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } - forkskinny_128_384_inv_round_final(&state, 0); - - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); - } - store_column_8(output_right, fstate.S); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; + } -} forkskinny_64_192_state_t; + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; + } +} -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -769,144 +438,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -917,42 +497,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -961,92 +544,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-skinnyutil.h index 83136cb..f2bc8ca 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/opt32_table/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: @@ -319,6 +349,61 @@ do { \ x = ((x << 1) & 0xEEEEU) | ((x >> 3) & 0x1111U); \ } while (0) +#define rows_to_columns_32(column0, column1, column2, column3, row0, row1, row2, row3) \ + do { \ + column0 = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ + column1 = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ + column2 = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ + column3 = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ + } while(0) + +#define columns_to_rows_32(row0, row1, row2, row3, column0, column1, column2, column3) rows_to_columns_32(row0, row1, row2, row3, column0, column1, column2, column3) + +#define load_column_8(dest, src) \ + do { \ + dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ + dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ + dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ + dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ + } while(0) + +#define store_column_8(dest, src) \ + do { \ + dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ + dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ + dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ + dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ + } while(0) + + +#define TK_to_column_256(columns, state) \ + do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ + } while(0) + +#define TK_to_column_384(columns, state) \ +do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ +} while(0) + /** @endcond */ #ifdef __cplusplus diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/aead-common.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/aead-common.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/forkae.c index 4a9671a..ead8ada 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,399 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + if (output_left && output_right){ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + } + else{ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, &tks, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + } + else{ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, &tks, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + } + else{ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, &tks, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny.c index c43ef98..4a7a01c 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny.c @@ -40,37 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; +#if !defined(__AVR__) -typedef struct +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds) { - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - - -} forkskinny_128_256_key_schedule_t; - -static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, const unsigned char key[32], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -81,8 +54,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -94,8 +67,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 28); /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -103,51 +76,48 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -156,95 +126,8 @@ static void forkskinny_128_256_round state->S[3] = s3; } -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - } - else{ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); - } - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -255,33 +138,37 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -290,96 +177,8 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - //uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - - -} forkskinny_128_384_key_schedule_t; - -static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, const unsigned char key[48], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -390,8 +189,8 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -401,10 +200,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 20); TK[2] = le_load_word32(key + 24); TK[3] = le_load_word32(key + 28); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -416,10 +216,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 36); TK[2] = le_load_word32(key + 40); TK[3] = le_load_word32(key + 44); + /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR3(TK[0]); @@ -427,51 +228,48 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -480,97 +278,8 @@ static void forkskinny_128_384_round state->S[3] = s3; } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - } - else{ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -581,33 +290,37 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -616,96 +329,9 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds) { - //uint16_t TK1[4]; /**< First part of the tweakey */ - //uint16_t TK2[4]; /**< Second part of the tweakey */ - //uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - - -} forkskinny_64_192_key_schedule_t; - -static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, const unsigned char key[24], uint8_t nb_rounds){ uint16_t TK[4]; unsigned round; @@ -716,8 +342,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 6); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny64_permute_tk(TK); } @@ -727,10 +353,11 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[1] = be_load_word16(key + 10); TK[2] = be_load_word16(key + 12); TK[3] = be_load_word16(key + 14); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR2(TK[0]); @@ -744,8 +371,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 22); /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR3(TK[0]); @@ -753,17 +380,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con } } -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -774,131 +392,44 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - } - else{ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -909,33 +440,35 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -944,61 +477,4 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, &ks, round); - } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); -} +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny.h index 0c1a707..1b4f5bf 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,172 @@ extern "C" { #endif /** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +/** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + +} forkskinny_128_256_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + // uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + +} forkskinny_128_384_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + + +} forkskinny_64_192_tweakey_schedule_t; + + +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_dec/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/aead-common.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/aead-common.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/forkae.c index 4a9671a..5b7ba3d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/forkae.c @@ -22,7 +22,7 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" +#include "internal-skinnyutil.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +138,458 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right,fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + memcpy(fstate.TK3, state.TK3, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny.c index af29f77..b96a04c 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny.c @@ -22,12 +22,23 @@ #include "internal-forkskinny.h" #include "internal-skinnyutil.h" -#include /** * \brief 7-bit round constants for all ForkSkinny block ciphers. */ -static unsigned char const RC[87] = {0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b,0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10}; +static unsigned char const RC[87] = { + 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, + 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, + 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, + 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, + 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, + 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, + 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, + 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, + 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b, + 0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, + 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 +}; static const uint32_t T[256] = {0x65656565, 0x4c4c4c4c, 0x6a6a6a6a, 0x42424242, 0x4b4b4b4b, 0x63636363, 0x43434343, 0x6b6b6b6b, 0x55555555, 0x75757575, 0x5a5a5a5a, 0x7a7a7a7a, 0x53535353, 0x73737373, 0x5b5b5b5b, 0x7b7b7b7b, 0x35353535, 0x8c8c8c8c, 0x3a3a3a3a, 0x81818181, 0x89898989, 0x33333333, 0x80808080, 0x3b3b3b3b, 0x95959595, 0x25252525, 0x98989898, 0x2a2a2a2a, 0x90909090, 0x23232323, 0x99999999, 0x2b2b2b2b, 0xe5e5e5e5, 0xcccccccc, 0xe8e8e8e8, 0xc1c1c1c1, 0xc9c9c9c9, 0xe0e0e0e0, 0xc0c0c0c0, 0xe9e9e9e9, 0xd5d5d5d5, 0xf5f5f5f5, 0xd8d8d8d8, 0xf8f8f8f8, 0xd0d0d0d0, 0xf0f0f0f0, 0xd9d9d9d9, 0xf9f9f9f9, 0xa5a5a5a5, 0x1c1c1c1c, 0xa8a8a8a8, 0x12121212, 0x1b1b1b1b, 0xa0a0a0a0, 0x13131313, 0xa9a9a9a9, 0x05050505, 0xb5b5b5b5, 0x0a0a0a0a, 0xb8b8b8b8, 0x03030303, 0xb0b0b0b0, 0x0b0b0b0b, 0xb9b9b9b9, 0x32323232, 0x88888888, 0x3c3c3c3c, 0x85858585, 0x8d8d8d8d, 0x34343434, 0x84848484, 0x3d3d3d3d, 0x91919191, 0x22222222, 0x9c9c9c9c, 0x2c2c2c2c, 0x94949494, 0x24242424, 0x9d9d9d9d, 0x2d2d2d2d, 0x62626262, 0x4a4a4a4a, 0x6c6c6c6c, 0x45454545, 0x4d4d4d4d, 0x64646464, 0x44444444, 0x6d6d6d6d, 0x52525252, 0x72727272, 0x5c5c5c5c, 0x7c7c7c7c, 0x54545454, 0x74747474, 0x5d5d5d5d, 0x7d7d7d7d, 0xa1a1a1a1, 0x1a1a1a1a, 0xacacacac, 0x15151515, 0x1d1d1d1d, 0xa4a4a4a4, 0x14141414, 0xadadadad, 0x02020202, 0xb1b1b1b1, 0x0c0c0c0c, 0xbcbcbcbc, 0x04040404, 0xb4b4b4b4, 0x0d0d0d0d, 0xbdbdbdbd, 0xe1e1e1e1, 0xc8c8c8c8, 0xecececec, 0xc5c5c5c5, 0xcdcdcdcd, 0xe4e4e4e4, 0xc4c4c4c4, 0xedededed, 0xd1d1d1d1, 0xf1f1f1f1, 0xdcdcdcdc, 0xfcfcfcfc, 0xd4d4d4d4, 0xf4f4f4f4, 0xdddddddd, 0xfdfdfdfd, 0x36363636, 0x8e8e8e8e, 0x38383838, 0x82828282, 0x8b8b8b8b, 0x30303030, 0x83838383, 0x39393939, 0x96969696, 0x26262626, 0x9a9a9a9a, 0x28282828, 0x93939393, 0x20202020, 0x9b9b9b9b, 0x29292929, 0x66666666, 0x4e4e4e4e, 0x68686868, 0x41414141, 0x49494949, 0x60606060, 0x40404040, 0x69696969, 0x56565656, 0x76767676, 0x58585858, 0x78787878, 0x50505050, 0x70707070, 0x59595959, 0x79797979, 0xa6a6a6a6, 0x1e1e1e1e, 0xaaaaaaaa, 0x11111111, 0x19191919, 0xa3a3a3a3, 0x10101010, 0xabababab, 0x06060606, 0xb6b6b6b6, 0x08080808, 0xbabababa, 0x00000000, 0xb3b3b3b3, 0x09090909, 0xbbbbbbbb, 0xe6e6e6e6, 0xcececece, 0xeaeaeaea, 0xc2c2c2c2, 0xcbcbcbcb, 0xe3e3e3e3, 0xc3c3c3c3, 0xebebebeb, 0xd6d6d6d6, 0xf6f6f6f6, 0xdadadada, 0xfafafafa, 0xd3d3d3d3, 0xf3f3f3f3, 0xdbdbdbdb, 0xfbfbfbfb, 0x31313131, 0x8a8a8a8a, 0x3e3e3e3e, 0x86868686, 0x8f8f8f8f, 0x37373737, 0x87878787, 0x3f3f3f3f, 0x92929292, 0x21212121, 0x9e9e9e9e, 0x2e2e2e2e, 0x97979797, 0x27272727, 0x9f9f9f9f, 0x2f2f2f2f, 0x61616161, 0x48484848, 0x6e6e6e6e, 0x46464646, 0x4f4f4f4f, 0x67676767, 0x47474747, 0x6f6f6f6f, 0x51515151, 0x71717171, 0x5e5e5e5e, 0x7e7e7e7e, 0x57575757, 0x77777777, 0x5f5f5f5f, 0x7f7f7f7f, 0xa2a2a2a2, 0x18181818, 0xaeaeaeae, 0x16161616, 0x1f1f1f1f, 0xa7a7a7a7, 0x17171717, 0xafafafaf, 0x01010101, 0xb2b2b2b2, 0x0e0e0e0e, 0xbebebebe, 0x07070707, 0xb7b7b7b7, 0x0f0f0f0f, 0xbfbfbfbf, 0xe2e2e2e2, 0xcacacaca, 0xeeeeeeee, 0xc6c6c6c6, 0xcfcfcfcf, 0xe7e7e7e7, 0xc7c7c7c7, 0xefefefef, 0xd2d2d2d2, 0xf2f2f2f2, 0xdededede, 0xfefefefe, 0xd7d7d7d7, 0xf7f7f7f7, 0xdfdfdfdf, 0xffffffff}; static const uint32_t T_inv[256] = {0xacacacac, 0xe8e8e8e8, 0x68686868, 0x3c3c3c3c, 0x6c6c6c6c, 0x38383838, 0xa8a8a8a8, 0xecececec, 0xaaaaaaaa, 0xaeaeaeae, 0x3a3a3a3a, 0x3e3e3e3e, 0x6a6a6a6a, 0x6e6e6e6e, 0xeaeaeaea, 0xeeeeeeee, 0xa6a6a6a6, 0xa3a3a3a3, 0x33333333, 0x36363636, 0x66666666, 0x63636363, 0xe3e3e3e3, 0xe6e6e6e6, 0xe1e1e1e1, 0xa4a4a4a4, 0x61616161, 0x34343434, 0x31313131, 0x64646464, 0xa1a1a1a1, 0xe4e4e4e4, 0x8d8d8d8d, 0xc9c9c9c9, 0x49494949, 0x1d1d1d1d, 0x4d4d4d4d, 0x19191919, 0x89898989, 0xcdcdcdcd, 0x8b8b8b8b, 0x8f8f8f8f, 0x1b1b1b1b, 0x1f1f1f1f, 0x4b4b4b4b, 0x4f4f4f4f, 0xcbcbcbcb, 0xcfcfcfcf, 0x85858585, 0xc0c0c0c0, 0x40404040, 0x15151515, 0x45454545, 0x10101010, 0x80808080, 0xc5c5c5c5, 0x82828282, 0x87878787, 0x12121212, 0x17171717, 0x42424242, 0x47474747, 0xc2c2c2c2, 0xc7c7c7c7, 0x96969696, 0x93939393, 0x03030303, 0x06060606, 0x56565656, 0x53535353, 0xd3d3d3d3, 0xd6d6d6d6, 0xd1d1d1d1, 0x94949494, 0x51515151, 0x04040404, 0x01010101, 0x54545454, 0x91919191, 0xd4d4d4d4, 0x9c9c9c9c, 0xd8d8d8d8, 0x58585858, 0x0c0c0c0c, 0x5c5c5c5c, 0x08080808, 0x98989898, 0xdcdcdcdc, 0x9a9a9a9a, 0x9e9e9e9e, 0x0a0a0a0a, 0x0e0e0e0e, 0x5a5a5a5a, 0x5e5e5e5e, 0xdadadada, 0xdededede, 0x95959595, 0xd0d0d0d0, 0x50505050, 0x05050505, 0x55555555, 0x00000000, 0x90909090, 0xd5d5d5d5, 0x92929292, 0x97979797, 0x02020202, 0x07070707, 0x52525252, 0x57575757, 0xd2d2d2d2, 0xd7d7d7d7, 0x9d9d9d9d, 0xd9d9d9d9, 0x59595959, 0x0d0d0d0d, 0x5d5d5d5d, 0x09090909, 0x99999999, 0xdddddddd, 0x9b9b9b9b, 0x9f9f9f9f, 0x0b0b0b0b, 0x0f0f0f0f, 0x5b5b5b5b, 0x5f5f5f5f, 0xdbdbdbdb, 0xdfdfdfdf, 0x16161616, 0x13131313, 0x83838383, 0x86868686, 0x46464646, 0x43434343, 0xc3c3c3c3, 0xc6c6c6c6, 0x41414141, 0x14141414, 0xc1c1c1c1, 0x84848484, 0x11111111, 0x44444444, 0x81818181, 0xc4c4c4c4, 0x1c1c1c1c, 0x48484848, 0xc8c8c8c8, 0x8c8c8c8c, 0x4c4c4c4c, 0x18181818, 0x88888888, 0xcccccccc, 0x1a1a1a1a, 0x1e1e1e1e, 0x8a8a8a8a, 0x8e8e8e8e, 0x4a4a4a4a, 0x4e4e4e4e, 0xcacacaca, 0xcececece, 0x35353535, 0x60606060, 0xe0e0e0e0, 0xa5a5a5a5, 0x65656565, 0x30303030, 0xa0a0a0a0, 0xe5e5e5e5, 0x32323232, 0x37373737, 0xa2a2a2a2, 0xa7a7a7a7, 0x62626262, 0x67676767, 0xe2e2e2e2, 0xe7e7e7e7, 0x3d3d3d3d, 0x69696969, 0xe9e9e9e9, 0xadadadad, 0x6d6d6d6d, 0x39393939, 0xa9a9a9a9, 0xedededed, 0x3b3b3b3b, 0x3f3f3f3f, 0xabababab, 0xafafafaf, 0x6b6b6b6b, 0x6f6f6f6f, 0xebebebeb, 0xefefefef, 0x26262626, 0x23232323, 0xb3b3b3b3, 0xb6b6b6b6, 0x76767676, 0x73737373, 0xf3f3f3f3, 0xf6f6f6f6, 0x71717171, 0x24242424, 0xf1f1f1f1, 0xb4b4b4b4, 0x21212121, 0x74747474, 0xb1b1b1b1, 0xf4f4f4f4, 0x2c2c2c2c, 0x78787878, 0xf8f8f8f8, 0xbcbcbcbc, 0x7c7c7c7c, 0x28282828, 0xb8b8b8b8, 0xfcfcfcfc, 0x2a2a2a2a, 0x2e2e2e2e, 0xbabababa, 0xbebebebe, 0x7a7a7a7a, 0x7e7e7e7e, 0xfafafafa, 0xfefefefe, 0x25252525, 0x70707070, 0xf0f0f0f0, 0xb5b5b5b5, 0x75757575, 0x20202020, 0xb0b0b0b0, 0xf5f5f5f5, 0x22222222, 0x27272727, 0xb2b2b2b2, 0xb7b7b7b7, 0x72727272, 0x77777777, 0xf2f2f2f2, 0xf7f7f7f7, 0x2d2d2d2d, 0x79797979, 0xf9f9f9f9, 0xbdbdbdbd, 0x7d7d7d7d, 0x29292929, 0xb9b9b9b9, 0xfdfdfdfd, 0x2b2b2b2b, 0x2f2f2f2f, 0xbbbbbbbb, 0xbfbfbfbf, 0x7b7b7b7b, 0x7f7f7f7f, 0xfbfbfbfb, 0xffffffff}; @@ -36,77 +47,12 @@ static const uint32_t AC_column0[87] = {0x1000101, 0x3000303, 0x7000707, 0xf000f static const uint32_t AC_column1[87] = {0x0, 0x0, 0x0, 0x0, 0x10000, 0x30000, 0x70000, 0x70000, 0x70000, 0x70000, 0x60000, 0x50000, 0x30000, 0x70000, 0x70000, 0x70000, 0x60000, 0x40000, 0x10000, 0x30000, 0x70000, 0x70000, 0x60000, 0x50000, 0x20000, 0x50000, 0x30000, 0x70000, 0x60000, 0x40000, 0x0, 0x0, 0x10000, 0x30000, 0x60000, 0x50000, 0x30000, 0x70000, 0x60000, 0x50000, 0x20000, 0x40000, 0x10000, 0x30000, 0x60000, 0x40000, 0x0, 0x10000, 0x20000, 0x50000, 0x20000, 0x50000, 0x30000, 0x60000, 0x40000, 0x0, 0x0, 0x0, 0x10000, 0x20000, 0x50000, 0x30000, 0x70000, 0x70000, 0x60000, 0x40000, 0x0, 0x10000, 0x30000, 0x70000, 0x60000, 0x50000, 0x30000, 0x60000, 0x50000, 0x30000, 0x60000, 0x40000, 0x10000, 0x20000, 0x40000, 0x10000, 0x20000, 0x50000, 0x20000, 0x40000, 0x10000}; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 +#if !defined(__AVR__) -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; - -#define load_column_8(dest, src) \ - do { \ - dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ - dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ - dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ - dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ - } while(0) - -#define store_column_8(dest, src) \ - do { \ - dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ - dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ - dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ - dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ - } while(0) - -#define rows_to_columns_32(columns, row0, row1, row2, row3) \ - do { \ - columns[0] = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ - columns[1] = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ - columns[2] = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ - columns[3] = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ - } while(0) - -#define columns_to_rows_32(rows, column0, column1, column2, column3) rows_to_columns_32(rows, column0, column1, column2, column3) - -#define TK_to_column_256(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -115,102 +61,45 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_256(tk_columns, state); + /* Perform all requested rounds */ + for (; first < last; ++first) { - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + TK_to_column_256(tk_columns, state); - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_left, state.S); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - } + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; } -static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; + + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -219,215 +108,128 @@ static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state skinny128_inv_permute_tk(state->TK2); /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_256_inv_round(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - // temp = 0x020000; - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} - -static void forkskinny_128_256_inv_round_final(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FINAL ROUND - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_256_inv_round_first(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER*2)-1); - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } - forkskinny_128_256_inv_round_final(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - } - - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); +} - /* Generate the left output block after another "before" rounds */ - forkskinny_128_256_inv_round_first(&state, FORKSKINNY_128_256_ROUNDS_BEFORE-1); - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - forkskinny_128_256_inv_round_final(&state, 0); - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - store_column_8(output_right,fstate.S); - } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -#define TK_to_column_384(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -436,148 +238,48 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_384(tk_columns, state); - - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); -} + /* Perform all requested rounds */ + for (; first < last; ++first) { -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } + TK_to_column_384(tk_columns, state); - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_left, state.S); - - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } -} + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; +} -static void forkskinny_128_384_inv_round_first(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -588,177 +290,144 @@ static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsi skinny128_inv_permute_tk(state->TK2); skinny128_inv_permute_tk(state->TK3); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + /* Inverse mix of the columns */ + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } -static void forkskinny_128_384_inv_round_final(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + // FINAL ROUND - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); - } - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_384_inv_round_first(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1); - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - forkskinny_128_384_inv_round_final(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - memcpy(fstate.TK3, state.TK3, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); - - /* Generate the left output block after another "before" rounds */ - forkskinny_128_384_inv_round_first(&state, FORKSKINNY_128_384_ROUNDS_BEFORE - 1); - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } - forkskinny_128_384_inv_round_final(&state, 0); - - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); - } - store_column_8(output_right, fstate.S); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; + } -} forkskinny_64_192_state_t; + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; + } +} -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -769,144 +438,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -917,42 +497,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -961,92 +544,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-skinnyutil.h index 83136cb..f2bc8ca 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/opt32_table/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: @@ -319,6 +349,61 @@ do { \ x = ((x << 1) & 0xEEEEU) | ((x >> 3) & 0x1111U); \ } while (0) +#define rows_to_columns_32(column0, column1, column2, column3, row0, row1, row2, row3) \ + do { \ + column0 = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ + column1 = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ + column2 = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ + column3 = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ + } while(0) + +#define columns_to_rows_32(row0, row1, row2, row3, column0, column1, column2, column3) rows_to_columns_32(row0, row1, row2, row3, column0, column1, column2, column3) + +#define load_column_8(dest, src) \ + do { \ + dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ + dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ + dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ + dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ + } while(0) + +#define store_column_8(dest, src) \ + do { \ + dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ + dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ + dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ + dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ + } while(0) + + +#define TK_to_column_256(columns, state) \ + do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ + } while(0) + +#define TK_to_column_384(columns, state) \ +do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ +} while(0) + /** @endcond */ #ifdef __cplusplus diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/aead-common.h b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/aead-common.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/forkae.c index 4a9671a..ead8ada 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,399 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + if (output_left && output_right){ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + } + else{ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, &tks, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + } + else{ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, &tks, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + } + else{ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, &tks, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny.c index c43ef98..4a7a01c 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny.c @@ -40,37 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; +#if !defined(__AVR__) -typedef struct +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds) { - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - - -} forkskinny_128_256_key_schedule_t; - -static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, const unsigned char key[32], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -81,8 +54,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -94,8 +67,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 28); /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -103,51 +76,48 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -156,95 +126,8 @@ static void forkskinny_128_256_round state->S[3] = s3; } -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - } - else{ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); - } - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -255,33 +138,37 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -290,96 +177,8 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - //uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - - -} forkskinny_128_384_key_schedule_t; - -static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, const unsigned char key[48], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -390,8 +189,8 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -401,10 +200,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 20); TK[2] = le_load_word32(key + 24); TK[3] = le_load_word32(key + 28); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -416,10 +216,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 36); TK[2] = le_load_word32(key + 40); TK[3] = le_load_word32(key + 44); + /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR3(TK[0]); @@ -427,51 +228,48 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -480,97 +278,8 @@ static void forkskinny_128_384_round state->S[3] = s3; } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - } - else{ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -581,33 +290,37 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -616,96 +329,9 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds) { - //uint16_t TK1[4]; /**< First part of the tweakey */ - //uint16_t TK2[4]; /**< Second part of the tweakey */ - //uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - - -} forkskinny_64_192_key_schedule_t; - -static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, const unsigned char key[24], uint8_t nb_rounds){ uint16_t TK[4]; unsigned round; @@ -716,8 +342,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 6); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny64_permute_tk(TK); } @@ -727,10 +353,11 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[1] = be_load_word16(key + 10); TK[2] = be_load_word16(key + 12); TK[3] = be_load_word16(key + 14); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR2(TK[0]); @@ -744,8 +371,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 22); /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR3(TK[0]); @@ -753,17 +380,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con } } -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -774,131 +392,44 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - } - else{ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -909,33 +440,35 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -944,61 +477,4 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, &ks, round); - } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); -} +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny.h index 0c1a707..1b4f5bf 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,172 @@ extern "C" { #endif /** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +/** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + +} forkskinny_128_256_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + // uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + +} forkskinny_128_384_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + + +} forkskinny_64_192_tweakey_schedule_t; + + +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_dec/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/aead-common.h b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/aead-common.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/forkae.c index 4a9671a..5b7ba3d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/forkae.c @@ -22,7 +22,7 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" +#include "internal-skinnyutil.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +138,458 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right,fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + memcpy(fstate.TK3, state.TK3, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny.c index af29f77..b96a04c 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny.c @@ -22,12 +22,23 @@ #include "internal-forkskinny.h" #include "internal-skinnyutil.h" -#include /** * \brief 7-bit round constants for all ForkSkinny block ciphers. */ -static unsigned char const RC[87] = {0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b,0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10}; +static unsigned char const RC[87] = { + 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, + 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, + 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, + 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, + 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, + 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, + 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, + 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, + 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b, + 0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, + 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 +}; static const uint32_t T[256] = {0x65656565, 0x4c4c4c4c, 0x6a6a6a6a, 0x42424242, 0x4b4b4b4b, 0x63636363, 0x43434343, 0x6b6b6b6b, 0x55555555, 0x75757575, 0x5a5a5a5a, 0x7a7a7a7a, 0x53535353, 0x73737373, 0x5b5b5b5b, 0x7b7b7b7b, 0x35353535, 0x8c8c8c8c, 0x3a3a3a3a, 0x81818181, 0x89898989, 0x33333333, 0x80808080, 0x3b3b3b3b, 0x95959595, 0x25252525, 0x98989898, 0x2a2a2a2a, 0x90909090, 0x23232323, 0x99999999, 0x2b2b2b2b, 0xe5e5e5e5, 0xcccccccc, 0xe8e8e8e8, 0xc1c1c1c1, 0xc9c9c9c9, 0xe0e0e0e0, 0xc0c0c0c0, 0xe9e9e9e9, 0xd5d5d5d5, 0xf5f5f5f5, 0xd8d8d8d8, 0xf8f8f8f8, 0xd0d0d0d0, 0xf0f0f0f0, 0xd9d9d9d9, 0xf9f9f9f9, 0xa5a5a5a5, 0x1c1c1c1c, 0xa8a8a8a8, 0x12121212, 0x1b1b1b1b, 0xa0a0a0a0, 0x13131313, 0xa9a9a9a9, 0x05050505, 0xb5b5b5b5, 0x0a0a0a0a, 0xb8b8b8b8, 0x03030303, 0xb0b0b0b0, 0x0b0b0b0b, 0xb9b9b9b9, 0x32323232, 0x88888888, 0x3c3c3c3c, 0x85858585, 0x8d8d8d8d, 0x34343434, 0x84848484, 0x3d3d3d3d, 0x91919191, 0x22222222, 0x9c9c9c9c, 0x2c2c2c2c, 0x94949494, 0x24242424, 0x9d9d9d9d, 0x2d2d2d2d, 0x62626262, 0x4a4a4a4a, 0x6c6c6c6c, 0x45454545, 0x4d4d4d4d, 0x64646464, 0x44444444, 0x6d6d6d6d, 0x52525252, 0x72727272, 0x5c5c5c5c, 0x7c7c7c7c, 0x54545454, 0x74747474, 0x5d5d5d5d, 0x7d7d7d7d, 0xa1a1a1a1, 0x1a1a1a1a, 0xacacacac, 0x15151515, 0x1d1d1d1d, 0xa4a4a4a4, 0x14141414, 0xadadadad, 0x02020202, 0xb1b1b1b1, 0x0c0c0c0c, 0xbcbcbcbc, 0x04040404, 0xb4b4b4b4, 0x0d0d0d0d, 0xbdbdbdbd, 0xe1e1e1e1, 0xc8c8c8c8, 0xecececec, 0xc5c5c5c5, 0xcdcdcdcd, 0xe4e4e4e4, 0xc4c4c4c4, 0xedededed, 0xd1d1d1d1, 0xf1f1f1f1, 0xdcdcdcdc, 0xfcfcfcfc, 0xd4d4d4d4, 0xf4f4f4f4, 0xdddddddd, 0xfdfdfdfd, 0x36363636, 0x8e8e8e8e, 0x38383838, 0x82828282, 0x8b8b8b8b, 0x30303030, 0x83838383, 0x39393939, 0x96969696, 0x26262626, 0x9a9a9a9a, 0x28282828, 0x93939393, 0x20202020, 0x9b9b9b9b, 0x29292929, 0x66666666, 0x4e4e4e4e, 0x68686868, 0x41414141, 0x49494949, 0x60606060, 0x40404040, 0x69696969, 0x56565656, 0x76767676, 0x58585858, 0x78787878, 0x50505050, 0x70707070, 0x59595959, 0x79797979, 0xa6a6a6a6, 0x1e1e1e1e, 0xaaaaaaaa, 0x11111111, 0x19191919, 0xa3a3a3a3, 0x10101010, 0xabababab, 0x06060606, 0xb6b6b6b6, 0x08080808, 0xbabababa, 0x00000000, 0xb3b3b3b3, 0x09090909, 0xbbbbbbbb, 0xe6e6e6e6, 0xcececece, 0xeaeaeaea, 0xc2c2c2c2, 0xcbcbcbcb, 0xe3e3e3e3, 0xc3c3c3c3, 0xebebebeb, 0xd6d6d6d6, 0xf6f6f6f6, 0xdadadada, 0xfafafafa, 0xd3d3d3d3, 0xf3f3f3f3, 0xdbdbdbdb, 0xfbfbfbfb, 0x31313131, 0x8a8a8a8a, 0x3e3e3e3e, 0x86868686, 0x8f8f8f8f, 0x37373737, 0x87878787, 0x3f3f3f3f, 0x92929292, 0x21212121, 0x9e9e9e9e, 0x2e2e2e2e, 0x97979797, 0x27272727, 0x9f9f9f9f, 0x2f2f2f2f, 0x61616161, 0x48484848, 0x6e6e6e6e, 0x46464646, 0x4f4f4f4f, 0x67676767, 0x47474747, 0x6f6f6f6f, 0x51515151, 0x71717171, 0x5e5e5e5e, 0x7e7e7e7e, 0x57575757, 0x77777777, 0x5f5f5f5f, 0x7f7f7f7f, 0xa2a2a2a2, 0x18181818, 0xaeaeaeae, 0x16161616, 0x1f1f1f1f, 0xa7a7a7a7, 0x17171717, 0xafafafaf, 0x01010101, 0xb2b2b2b2, 0x0e0e0e0e, 0xbebebebe, 0x07070707, 0xb7b7b7b7, 0x0f0f0f0f, 0xbfbfbfbf, 0xe2e2e2e2, 0xcacacaca, 0xeeeeeeee, 0xc6c6c6c6, 0xcfcfcfcf, 0xe7e7e7e7, 0xc7c7c7c7, 0xefefefef, 0xd2d2d2d2, 0xf2f2f2f2, 0xdededede, 0xfefefefe, 0xd7d7d7d7, 0xf7f7f7f7, 0xdfdfdfdf, 0xffffffff}; static const uint32_t T_inv[256] = {0xacacacac, 0xe8e8e8e8, 0x68686868, 0x3c3c3c3c, 0x6c6c6c6c, 0x38383838, 0xa8a8a8a8, 0xecececec, 0xaaaaaaaa, 0xaeaeaeae, 0x3a3a3a3a, 0x3e3e3e3e, 0x6a6a6a6a, 0x6e6e6e6e, 0xeaeaeaea, 0xeeeeeeee, 0xa6a6a6a6, 0xa3a3a3a3, 0x33333333, 0x36363636, 0x66666666, 0x63636363, 0xe3e3e3e3, 0xe6e6e6e6, 0xe1e1e1e1, 0xa4a4a4a4, 0x61616161, 0x34343434, 0x31313131, 0x64646464, 0xa1a1a1a1, 0xe4e4e4e4, 0x8d8d8d8d, 0xc9c9c9c9, 0x49494949, 0x1d1d1d1d, 0x4d4d4d4d, 0x19191919, 0x89898989, 0xcdcdcdcd, 0x8b8b8b8b, 0x8f8f8f8f, 0x1b1b1b1b, 0x1f1f1f1f, 0x4b4b4b4b, 0x4f4f4f4f, 0xcbcbcbcb, 0xcfcfcfcf, 0x85858585, 0xc0c0c0c0, 0x40404040, 0x15151515, 0x45454545, 0x10101010, 0x80808080, 0xc5c5c5c5, 0x82828282, 0x87878787, 0x12121212, 0x17171717, 0x42424242, 0x47474747, 0xc2c2c2c2, 0xc7c7c7c7, 0x96969696, 0x93939393, 0x03030303, 0x06060606, 0x56565656, 0x53535353, 0xd3d3d3d3, 0xd6d6d6d6, 0xd1d1d1d1, 0x94949494, 0x51515151, 0x04040404, 0x01010101, 0x54545454, 0x91919191, 0xd4d4d4d4, 0x9c9c9c9c, 0xd8d8d8d8, 0x58585858, 0x0c0c0c0c, 0x5c5c5c5c, 0x08080808, 0x98989898, 0xdcdcdcdc, 0x9a9a9a9a, 0x9e9e9e9e, 0x0a0a0a0a, 0x0e0e0e0e, 0x5a5a5a5a, 0x5e5e5e5e, 0xdadadada, 0xdededede, 0x95959595, 0xd0d0d0d0, 0x50505050, 0x05050505, 0x55555555, 0x00000000, 0x90909090, 0xd5d5d5d5, 0x92929292, 0x97979797, 0x02020202, 0x07070707, 0x52525252, 0x57575757, 0xd2d2d2d2, 0xd7d7d7d7, 0x9d9d9d9d, 0xd9d9d9d9, 0x59595959, 0x0d0d0d0d, 0x5d5d5d5d, 0x09090909, 0x99999999, 0xdddddddd, 0x9b9b9b9b, 0x9f9f9f9f, 0x0b0b0b0b, 0x0f0f0f0f, 0x5b5b5b5b, 0x5f5f5f5f, 0xdbdbdbdb, 0xdfdfdfdf, 0x16161616, 0x13131313, 0x83838383, 0x86868686, 0x46464646, 0x43434343, 0xc3c3c3c3, 0xc6c6c6c6, 0x41414141, 0x14141414, 0xc1c1c1c1, 0x84848484, 0x11111111, 0x44444444, 0x81818181, 0xc4c4c4c4, 0x1c1c1c1c, 0x48484848, 0xc8c8c8c8, 0x8c8c8c8c, 0x4c4c4c4c, 0x18181818, 0x88888888, 0xcccccccc, 0x1a1a1a1a, 0x1e1e1e1e, 0x8a8a8a8a, 0x8e8e8e8e, 0x4a4a4a4a, 0x4e4e4e4e, 0xcacacaca, 0xcececece, 0x35353535, 0x60606060, 0xe0e0e0e0, 0xa5a5a5a5, 0x65656565, 0x30303030, 0xa0a0a0a0, 0xe5e5e5e5, 0x32323232, 0x37373737, 0xa2a2a2a2, 0xa7a7a7a7, 0x62626262, 0x67676767, 0xe2e2e2e2, 0xe7e7e7e7, 0x3d3d3d3d, 0x69696969, 0xe9e9e9e9, 0xadadadad, 0x6d6d6d6d, 0x39393939, 0xa9a9a9a9, 0xedededed, 0x3b3b3b3b, 0x3f3f3f3f, 0xabababab, 0xafafafaf, 0x6b6b6b6b, 0x6f6f6f6f, 0xebebebeb, 0xefefefef, 0x26262626, 0x23232323, 0xb3b3b3b3, 0xb6b6b6b6, 0x76767676, 0x73737373, 0xf3f3f3f3, 0xf6f6f6f6, 0x71717171, 0x24242424, 0xf1f1f1f1, 0xb4b4b4b4, 0x21212121, 0x74747474, 0xb1b1b1b1, 0xf4f4f4f4, 0x2c2c2c2c, 0x78787878, 0xf8f8f8f8, 0xbcbcbcbc, 0x7c7c7c7c, 0x28282828, 0xb8b8b8b8, 0xfcfcfcfc, 0x2a2a2a2a, 0x2e2e2e2e, 0xbabababa, 0xbebebebe, 0x7a7a7a7a, 0x7e7e7e7e, 0xfafafafa, 0xfefefefe, 0x25252525, 0x70707070, 0xf0f0f0f0, 0xb5b5b5b5, 0x75757575, 0x20202020, 0xb0b0b0b0, 0xf5f5f5f5, 0x22222222, 0x27272727, 0xb2b2b2b2, 0xb7b7b7b7, 0x72727272, 0x77777777, 0xf2f2f2f2, 0xf7f7f7f7, 0x2d2d2d2d, 0x79797979, 0xf9f9f9f9, 0xbdbdbdbd, 0x7d7d7d7d, 0x29292929, 0xb9b9b9b9, 0xfdfdfdfd, 0x2b2b2b2b, 0x2f2f2f2f, 0xbbbbbbbb, 0xbfbfbfbf, 0x7b7b7b7b, 0x7f7f7f7f, 0xfbfbfbfb, 0xffffffff}; @@ -36,77 +47,12 @@ static const uint32_t AC_column0[87] = {0x1000101, 0x3000303, 0x7000707, 0xf000f static const uint32_t AC_column1[87] = {0x0, 0x0, 0x0, 0x0, 0x10000, 0x30000, 0x70000, 0x70000, 0x70000, 0x70000, 0x60000, 0x50000, 0x30000, 0x70000, 0x70000, 0x70000, 0x60000, 0x40000, 0x10000, 0x30000, 0x70000, 0x70000, 0x60000, 0x50000, 0x20000, 0x50000, 0x30000, 0x70000, 0x60000, 0x40000, 0x0, 0x0, 0x10000, 0x30000, 0x60000, 0x50000, 0x30000, 0x70000, 0x60000, 0x50000, 0x20000, 0x40000, 0x10000, 0x30000, 0x60000, 0x40000, 0x0, 0x10000, 0x20000, 0x50000, 0x20000, 0x50000, 0x30000, 0x60000, 0x40000, 0x0, 0x0, 0x0, 0x10000, 0x20000, 0x50000, 0x30000, 0x70000, 0x70000, 0x60000, 0x40000, 0x0, 0x10000, 0x30000, 0x70000, 0x60000, 0x50000, 0x30000, 0x60000, 0x50000, 0x30000, 0x60000, 0x40000, 0x10000, 0x20000, 0x40000, 0x10000, 0x20000, 0x50000, 0x20000, 0x40000, 0x10000}; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 +#if !defined(__AVR__) -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; - -#define load_column_8(dest, src) \ - do { \ - dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ - dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ - dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ - dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ - } while(0) - -#define store_column_8(dest, src) \ - do { \ - dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ - dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ - dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ - dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ - } while(0) - -#define rows_to_columns_32(columns, row0, row1, row2, row3) \ - do { \ - columns[0] = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ - columns[1] = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ - columns[2] = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ - columns[3] = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ - } while(0) - -#define columns_to_rows_32(rows, column0, column1, column2, column3) rows_to_columns_32(rows, column0, column1, column2, column3) - -#define TK_to_column_256(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -115,102 +61,45 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_256(tk_columns, state); + /* Perform all requested rounds */ + for (; first < last; ++first) { - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + TK_to_column_256(tk_columns, state); - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_left, state.S); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - } + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; } -static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; + + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -219,215 +108,128 @@ static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state skinny128_inv_permute_tk(state->TK2); /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_256_inv_round(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - // temp = 0x020000; - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} - -static void forkskinny_128_256_inv_round_final(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FINAL ROUND - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_256_inv_round_first(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER*2)-1); - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } - forkskinny_128_256_inv_round_final(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - } - - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); +} - /* Generate the left output block after another "before" rounds */ - forkskinny_128_256_inv_round_first(&state, FORKSKINNY_128_256_ROUNDS_BEFORE-1); - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - forkskinny_128_256_inv_round_final(&state, 0); - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - store_column_8(output_right,fstate.S); - } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -#define TK_to_column_384(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -436,148 +238,48 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_384(tk_columns, state); - - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); -} + /* Perform all requested rounds */ + for (; first < last; ++first) { -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } + TK_to_column_384(tk_columns, state); - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_left, state.S); - - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } -} + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; +} -static void forkskinny_128_384_inv_round_first(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -588,177 +290,144 @@ static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsi skinny128_inv_permute_tk(state->TK2); skinny128_inv_permute_tk(state->TK3); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + /* Inverse mix of the columns */ + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } -static void forkskinny_128_384_inv_round_final(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + // FINAL ROUND - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); - } - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_384_inv_round_first(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1); - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - forkskinny_128_384_inv_round_final(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - memcpy(fstate.TK3, state.TK3, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); - - /* Generate the left output block after another "before" rounds */ - forkskinny_128_384_inv_round_first(&state, FORKSKINNY_128_384_ROUNDS_BEFORE - 1); - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } - forkskinny_128_384_inv_round_final(&state, 0); - - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); - } - store_column_8(output_right, fstate.S); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; + } -} forkskinny_64_192_state_t; + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; + } +} -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -769,144 +438,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -917,42 +497,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -961,92 +544,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-skinnyutil.h index 83136cb..f2bc8ca 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/opt32_table/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: @@ -319,6 +349,61 @@ do { \ x = ((x << 1) & 0xEEEEU) | ((x >> 3) & 0x1111U); \ } while (0) +#define rows_to_columns_32(column0, column1, column2, column3, row0, row1, row2, row3) \ + do { \ + column0 = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ + column1 = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ + column2 = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ + column3 = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ + } while(0) + +#define columns_to_rows_32(row0, row1, row2, row3, column0, column1, column2, column3) rows_to_columns_32(row0, row1, row2, row3, column0, column1, column2, column3) + +#define load_column_8(dest, src) \ + do { \ + dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ + dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ + dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ + dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ + } while(0) + +#define store_column_8(dest, src) \ + do { \ + dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ + dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ + dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ + dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ + } while(0) + + +#define TK_to_column_256(columns, state) \ + do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ + } while(0) + +#define TK_to_column_384(columns, state) \ +do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ +} while(0) + /** @endcond */ #ifdef __cplusplus diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/aead-common.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/aead-common.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/forkae.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/forkae.c index 4a9671a..ead8ada 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/forkae.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,399 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + if (output_left && output_right){ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + } + else{ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, &tks, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + } + else{ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, &tks, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + } + else{ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, &tks, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny.c index c43ef98..4a7a01c 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny.c @@ -40,37 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; +#if !defined(__AVR__) -typedef struct +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds) { - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - - -} forkskinny_128_256_key_schedule_t; - -static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, const unsigned char key[32], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -81,8 +54,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -94,8 +67,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 28); /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -103,51 +76,48 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -156,95 +126,8 @@ static void forkskinny_128_256_round state->S[3] = s3; } -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - } - else{ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); - } - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -255,33 +138,37 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -290,96 +177,8 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - //uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - - -} forkskinny_128_384_key_schedule_t; - -static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, const unsigned char key[48], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -390,8 +189,8 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -401,10 +200,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 20); TK[2] = le_load_word32(key + 24); TK[3] = le_load_word32(key + 28); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -416,10 +216,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 36); TK[2] = le_load_word32(key + 40); TK[3] = le_load_word32(key + 44); + /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR3(TK[0]); @@ -427,51 +228,48 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -480,97 +278,8 @@ static void forkskinny_128_384_round state->S[3] = s3; } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - } - else{ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -581,33 +290,37 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -616,96 +329,9 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds) { - //uint16_t TK1[4]; /**< First part of the tweakey */ - //uint16_t TK2[4]; /**< Second part of the tweakey */ - //uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - - -} forkskinny_64_192_key_schedule_t; - -static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, const unsigned char key[24], uint8_t nb_rounds){ uint16_t TK[4]; unsigned round; @@ -716,8 +342,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 6); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny64_permute_tk(TK); } @@ -727,10 +353,11 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[1] = be_load_word16(key + 10); TK[2] = be_load_word16(key + 12); TK[3] = be_load_word16(key + 14); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR2(TK[0]); @@ -744,8 +371,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 22); /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR3(TK[0]); @@ -753,17 +380,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con } } -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -774,131 +392,44 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - } - else{ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -909,33 +440,35 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -944,61 +477,4 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, &ks, round); - } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); -} +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny.h index 0c1a707..1b4f5bf 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,172 @@ extern "C" { #endif /** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +/** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + +} forkskinny_128_256_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + // uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + +} forkskinny_128_384_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + + +} forkskinny_64_192_tweakey_schedule_t; + + +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_dec/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/aead-common.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/aead-common.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/forkae.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/forkae.c index 4a9671a..5b7ba3d 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/forkae.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/forkae.c @@ -22,7 +22,7 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" +#include "internal-skinnyutil.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +138,458 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right,fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + memcpy(fstate.TK3, state.TK3, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny.c index af29f77..b96a04c 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny.c @@ -22,12 +22,23 @@ #include "internal-forkskinny.h" #include "internal-skinnyutil.h" -#include /** * \brief 7-bit round constants for all ForkSkinny block ciphers. */ -static unsigned char const RC[87] = {0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b,0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10}; +static unsigned char const RC[87] = { + 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, + 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, + 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, + 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, + 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, + 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, + 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, + 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, + 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b, + 0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, + 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 +}; static const uint32_t T[256] = {0x65656565, 0x4c4c4c4c, 0x6a6a6a6a, 0x42424242, 0x4b4b4b4b, 0x63636363, 0x43434343, 0x6b6b6b6b, 0x55555555, 0x75757575, 0x5a5a5a5a, 0x7a7a7a7a, 0x53535353, 0x73737373, 0x5b5b5b5b, 0x7b7b7b7b, 0x35353535, 0x8c8c8c8c, 0x3a3a3a3a, 0x81818181, 0x89898989, 0x33333333, 0x80808080, 0x3b3b3b3b, 0x95959595, 0x25252525, 0x98989898, 0x2a2a2a2a, 0x90909090, 0x23232323, 0x99999999, 0x2b2b2b2b, 0xe5e5e5e5, 0xcccccccc, 0xe8e8e8e8, 0xc1c1c1c1, 0xc9c9c9c9, 0xe0e0e0e0, 0xc0c0c0c0, 0xe9e9e9e9, 0xd5d5d5d5, 0xf5f5f5f5, 0xd8d8d8d8, 0xf8f8f8f8, 0xd0d0d0d0, 0xf0f0f0f0, 0xd9d9d9d9, 0xf9f9f9f9, 0xa5a5a5a5, 0x1c1c1c1c, 0xa8a8a8a8, 0x12121212, 0x1b1b1b1b, 0xa0a0a0a0, 0x13131313, 0xa9a9a9a9, 0x05050505, 0xb5b5b5b5, 0x0a0a0a0a, 0xb8b8b8b8, 0x03030303, 0xb0b0b0b0, 0x0b0b0b0b, 0xb9b9b9b9, 0x32323232, 0x88888888, 0x3c3c3c3c, 0x85858585, 0x8d8d8d8d, 0x34343434, 0x84848484, 0x3d3d3d3d, 0x91919191, 0x22222222, 0x9c9c9c9c, 0x2c2c2c2c, 0x94949494, 0x24242424, 0x9d9d9d9d, 0x2d2d2d2d, 0x62626262, 0x4a4a4a4a, 0x6c6c6c6c, 0x45454545, 0x4d4d4d4d, 0x64646464, 0x44444444, 0x6d6d6d6d, 0x52525252, 0x72727272, 0x5c5c5c5c, 0x7c7c7c7c, 0x54545454, 0x74747474, 0x5d5d5d5d, 0x7d7d7d7d, 0xa1a1a1a1, 0x1a1a1a1a, 0xacacacac, 0x15151515, 0x1d1d1d1d, 0xa4a4a4a4, 0x14141414, 0xadadadad, 0x02020202, 0xb1b1b1b1, 0x0c0c0c0c, 0xbcbcbcbc, 0x04040404, 0xb4b4b4b4, 0x0d0d0d0d, 0xbdbdbdbd, 0xe1e1e1e1, 0xc8c8c8c8, 0xecececec, 0xc5c5c5c5, 0xcdcdcdcd, 0xe4e4e4e4, 0xc4c4c4c4, 0xedededed, 0xd1d1d1d1, 0xf1f1f1f1, 0xdcdcdcdc, 0xfcfcfcfc, 0xd4d4d4d4, 0xf4f4f4f4, 0xdddddddd, 0xfdfdfdfd, 0x36363636, 0x8e8e8e8e, 0x38383838, 0x82828282, 0x8b8b8b8b, 0x30303030, 0x83838383, 0x39393939, 0x96969696, 0x26262626, 0x9a9a9a9a, 0x28282828, 0x93939393, 0x20202020, 0x9b9b9b9b, 0x29292929, 0x66666666, 0x4e4e4e4e, 0x68686868, 0x41414141, 0x49494949, 0x60606060, 0x40404040, 0x69696969, 0x56565656, 0x76767676, 0x58585858, 0x78787878, 0x50505050, 0x70707070, 0x59595959, 0x79797979, 0xa6a6a6a6, 0x1e1e1e1e, 0xaaaaaaaa, 0x11111111, 0x19191919, 0xa3a3a3a3, 0x10101010, 0xabababab, 0x06060606, 0xb6b6b6b6, 0x08080808, 0xbabababa, 0x00000000, 0xb3b3b3b3, 0x09090909, 0xbbbbbbbb, 0xe6e6e6e6, 0xcececece, 0xeaeaeaea, 0xc2c2c2c2, 0xcbcbcbcb, 0xe3e3e3e3, 0xc3c3c3c3, 0xebebebeb, 0xd6d6d6d6, 0xf6f6f6f6, 0xdadadada, 0xfafafafa, 0xd3d3d3d3, 0xf3f3f3f3, 0xdbdbdbdb, 0xfbfbfbfb, 0x31313131, 0x8a8a8a8a, 0x3e3e3e3e, 0x86868686, 0x8f8f8f8f, 0x37373737, 0x87878787, 0x3f3f3f3f, 0x92929292, 0x21212121, 0x9e9e9e9e, 0x2e2e2e2e, 0x97979797, 0x27272727, 0x9f9f9f9f, 0x2f2f2f2f, 0x61616161, 0x48484848, 0x6e6e6e6e, 0x46464646, 0x4f4f4f4f, 0x67676767, 0x47474747, 0x6f6f6f6f, 0x51515151, 0x71717171, 0x5e5e5e5e, 0x7e7e7e7e, 0x57575757, 0x77777777, 0x5f5f5f5f, 0x7f7f7f7f, 0xa2a2a2a2, 0x18181818, 0xaeaeaeae, 0x16161616, 0x1f1f1f1f, 0xa7a7a7a7, 0x17171717, 0xafafafaf, 0x01010101, 0xb2b2b2b2, 0x0e0e0e0e, 0xbebebebe, 0x07070707, 0xb7b7b7b7, 0x0f0f0f0f, 0xbfbfbfbf, 0xe2e2e2e2, 0xcacacaca, 0xeeeeeeee, 0xc6c6c6c6, 0xcfcfcfcf, 0xe7e7e7e7, 0xc7c7c7c7, 0xefefefef, 0xd2d2d2d2, 0xf2f2f2f2, 0xdededede, 0xfefefefe, 0xd7d7d7d7, 0xf7f7f7f7, 0xdfdfdfdf, 0xffffffff}; static const uint32_t T_inv[256] = {0xacacacac, 0xe8e8e8e8, 0x68686868, 0x3c3c3c3c, 0x6c6c6c6c, 0x38383838, 0xa8a8a8a8, 0xecececec, 0xaaaaaaaa, 0xaeaeaeae, 0x3a3a3a3a, 0x3e3e3e3e, 0x6a6a6a6a, 0x6e6e6e6e, 0xeaeaeaea, 0xeeeeeeee, 0xa6a6a6a6, 0xa3a3a3a3, 0x33333333, 0x36363636, 0x66666666, 0x63636363, 0xe3e3e3e3, 0xe6e6e6e6, 0xe1e1e1e1, 0xa4a4a4a4, 0x61616161, 0x34343434, 0x31313131, 0x64646464, 0xa1a1a1a1, 0xe4e4e4e4, 0x8d8d8d8d, 0xc9c9c9c9, 0x49494949, 0x1d1d1d1d, 0x4d4d4d4d, 0x19191919, 0x89898989, 0xcdcdcdcd, 0x8b8b8b8b, 0x8f8f8f8f, 0x1b1b1b1b, 0x1f1f1f1f, 0x4b4b4b4b, 0x4f4f4f4f, 0xcbcbcbcb, 0xcfcfcfcf, 0x85858585, 0xc0c0c0c0, 0x40404040, 0x15151515, 0x45454545, 0x10101010, 0x80808080, 0xc5c5c5c5, 0x82828282, 0x87878787, 0x12121212, 0x17171717, 0x42424242, 0x47474747, 0xc2c2c2c2, 0xc7c7c7c7, 0x96969696, 0x93939393, 0x03030303, 0x06060606, 0x56565656, 0x53535353, 0xd3d3d3d3, 0xd6d6d6d6, 0xd1d1d1d1, 0x94949494, 0x51515151, 0x04040404, 0x01010101, 0x54545454, 0x91919191, 0xd4d4d4d4, 0x9c9c9c9c, 0xd8d8d8d8, 0x58585858, 0x0c0c0c0c, 0x5c5c5c5c, 0x08080808, 0x98989898, 0xdcdcdcdc, 0x9a9a9a9a, 0x9e9e9e9e, 0x0a0a0a0a, 0x0e0e0e0e, 0x5a5a5a5a, 0x5e5e5e5e, 0xdadadada, 0xdededede, 0x95959595, 0xd0d0d0d0, 0x50505050, 0x05050505, 0x55555555, 0x00000000, 0x90909090, 0xd5d5d5d5, 0x92929292, 0x97979797, 0x02020202, 0x07070707, 0x52525252, 0x57575757, 0xd2d2d2d2, 0xd7d7d7d7, 0x9d9d9d9d, 0xd9d9d9d9, 0x59595959, 0x0d0d0d0d, 0x5d5d5d5d, 0x09090909, 0x99999999, 0xdddddddd, 0x9b9b9b9b, 0x9f9f9f9f, 0x0b0b0b0b, 0x0f0f0f0f, 0x5b5b5b5b, 0x5f5f5f5f, 0xdbdbdbdb, 0xdfdfdfdf, 0x16161616, 0x13131313, 0x83838383, 0x86868686, 0x46464646, 0x43434343, 0xc3c3c3c3, 0xc6c6c6c6, 0x41414141, 0x14141414, 0xc1c1c1c1, 0x84848484, 0x11111111, 0x44444444, 0x81818181, 0xc4c4c4c4, 0x1c1c1c1c, 0x48484848, 0xc8c8c8c8, 0x8c8c8c8c, 0x4c4c4c4c, 0x18181818, 0x88888888, 0xcccccccc, 0x1a1a1a1a, 0x1e1e1e1e, 0x8a8a8a8a, 0x8e8e8e8e, 0x4a4a4a4a, 0x4e4e4e4e, 0xcacacaca, 0xcececece, 0x35353535, 0x60606060, 0xe0e0e0e0, 0xa5a5a5a5, 0x65656565, 0x30303030, 0xa0a0a0a0, 0xe5e5e5e5, 0x32323232, 0x37373737, 0xa2a2a2a2, 0xa7a7a7a7, 0x62626262, 0x67676767, 0xe2e2e2e2, 0xe7e7e7e7, 0x3d3d3d3d, 0x69696969, 0xe9e9e9e9, 0xadadadad, 0x6d6d6d6d, 0x39393939, 0xa9a9a9a9, 0xedededed, 0x3b3b3b3b, 0x3f3f3f3f, 0xabababab, 0xafafafaf, 0x6b6b6b6b, 0x6f6f6f6f, 0xebebebeb, 0xefefefef, 0x26262626, 0x23232323, 0xb3b3b3b3, 0xb6b6b6b6, 0x76767676, 0x73737373, 0xf3f3f3f3, 0xf6f6f6f6, 0x71717171, 0x24242424, 0xf1f1f1f1, 0xb4b4b4b4, 0x21212121, 0x74747474, 0xb1b1b1b1, 0xf4f4f4f4, 0x2c2c2c2c, 0x78787878, 0xf8f8f8f8, 0xbcbcbcbc, 0x7c7c7c7c, 0x28282828, 0xb8b8b8b8, 0xfcfcfcfc, 0x2a2a2a2a, 0x2e2e2e2e, 0xbabababa, 0xbebebebe, 0x7a7a7a7a, 0x7e7e7e7e, 0xfafafafa, 0xfefefefe, 0x25252525, 0x70707070, 0xf0f0f0f0, 0xb5b5b5b5, 0x75757575, 0x20202020, 0xb0b0b0b0, 0xf5f5f5f5, 0x22222222, 0x27272727, 0xb2b2b2b2, 0xb7b7b7b7, 0x72727272, 0x77777777, 0xf2f2f2f2, 0xf7f7f7f7, 0x2d2d2d2d, 0x79797979, 0xf9f9f9f9, 0xbdbdbdbd, 0x7d7d7d7d, 0x29292929, 0xb9b9b9b9, 0xfdfdfdfd, 0x2b2b2b2b, 0x2f2f2f2f, 0xbbbbbbbb, 0xbfbfbfbf, 0x7b7b7b7b, 0x7f7f7f7f, 0xfbfbfbfb, 0xffffffff}; @@ -36,77 +47,12 @@ static const uint32_t AC_column0[87] = {0x1000101, 0x3000303, 0x7000707, 0xf000f static const uint32_t AC_column1[87] = {0x0, 0x0, 0x0, 0x0, 0x10000, 0x30000, 0x70000, 0x70000, 0x70000, 0x70000, 0x60000, 0x50000, 0x30000, 0x70000, 0x70000, 0x70000, 0x60000, 0x40000, 0x10000, 0x30000, 0x70000, 0x70000, 0x60000, 0x50000, 0x20000, 0x50000, 0x30000, 0x70000, 0x60000, 0x40000, 0x0, 0x0, 0x10000, 0x30000, 0x60000, 0x50000, 0x30000, 0x70000, 0x60000, 0x50000, 0x20000, 0x40000, 0x10000, 0x30000, 0x60000, 0x40000, 0x0, 0x10000, 0x20000, 0x50000, 0x20000, 0x50000, 0x30000, 0x60000, 0x40000, 0x0, 0x0, 0x0, 0x10000, 0x20000, 0x50000, 0x30000, 0x70000, 0x70000, 0x60000, 0x40000, 0x0, 0x10000, 0x30000, 0x70000, 0x60000, 0x50000, 0x30000, 0x60000, 0x50000, 0x30000, 0x60000, 0x40000, 0x10000, 0x20000, 0x40000, 0x10000, 0x20000, 0x50000, 0x20000, 0x40000, 0x10000}; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 +#if !defined(__AVR__) -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; - -#define load_column_8(dest, src) \ - do { \ - dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ - dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ - dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ - dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ - } while(0) - -#define store_column_8(dest, src) \ - do { \ - dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ - dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ - dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ - dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ - } while(0) - -#define rows_to_columns_32(columns, row0, row1, row2, row3) \ - do { \ - columns[0] = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ - columns[1] = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ - columns[2] = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ - columns[3] = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ - } while(0) - -#define columns_to_rows_32(rows, column0, column1, column2, column3) rows_to_columns_32(rows, column0, column1, column2, column3) - -#define TK_to_column_256(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -115,102 +61,45 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_256(tk_columns, state); + /* Perform all requested rounds */ + for (; first < last; ++first) { - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + TK_to_column_256(tk_columns, state); - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_left, state.S); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - } + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; } -static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; + + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -219,215 +108,128 @@ static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state skinny128_inv_permute_tk(state->TK2); /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_256_inv_round(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - // temp = 0x020000; - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} - -static void forkskinny_128_256_inv_round_final(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FINAL ROUND - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_256_inv_round_first(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER*2)-1); - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } - forkskinny_128_256_inv_round_final(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - } - - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); +} - /* Generate the left output block after another "before" rounds */ - forkskinny_128_256_inv_round_first(&state, FORKSKINNY_128_256_ROUNDS_BEFORE-1); - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - forkskinny_128_256_inv_round_final(&state, 0); - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - store_column_8(output_right,fstate.S); - } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -#define TK_to_column_384(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -436,148 +238,48 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_384(tk_columns, state); - - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); -} + /* Perform all requested rounds */ + for (; first < last; ++first) { -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } + TK_to_column_384(tk_columns, state); - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_left, state.S); - - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } -} + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; +} -static void forkskinny_128_384_inv_round_first(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -588,177 +290,144 @@ static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsi skinny128_inv_permute_tk(state->TK2); skinny128_inv_permute_tk(state->TK3); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + /* Inverse mix of the columns */ + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } -static void forkskinny_128_384_inv_round_final(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + // FINAL ROUND - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); - } - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_384_inv_round_first(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1); - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - forkskinny_128_384_inv_round_final(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - memcpy(fstate.TK3, state.TK3, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); - - /* Generate the left output block after another "before" rounds */ - forkskinny_128_384_inv_round_first(&state, FORKSKINNY_128_384_ROUNDS_BEFORE - 1); - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } - forkskinny_128_384_inv_round_final(&state, 0); - - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); - } - store_column_8(output_right, fstate.S); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; + } -} forkskinny_64_192_state_t; + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; + } +} -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -769,144 +438,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -917,42 +497,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -961,92 +544,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-skinnyutil.h index 83136cb..f2bc8ca 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/opt32_table/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: @@ -319,6 +349,61 @@ do { \ x = ((x << 1) & 0xEEEEU) | ((x >> 3) & 0x1111U); \ } while (0) +#define rows_to_columns_32(column0, column1, column2, column3, row0, row1, row2, row3) \ + do { \ + column0 = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ + column1 = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ + column2 = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ + column3 = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ + } while(0) + +#define columns_to_rows_32(row0, row1, row2, row3, column0, column1, column2, column3) rows_to_columns_32(row0, row1, row2, row3, column0, column1, column2, column3) + +#define load_column_8(dest, src) \ + do { \ + dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ + dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ + dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ + dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ + } while(0) + +#define store_column_8(dest, src) \ + do { \ + dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ + dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ + dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ + dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ + } while(0) + + +#define TK_to_column_256(columns, state) \ + do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ + } while(0) + +#define TK_to_column_384(columns, state) \ +do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ +} while(0) + /** @endcond */ #ifdef __cplusplus diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/aead-common.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/aead-common.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/forkae.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/forkae.c index 4a9671a..ead8ada 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/forkae.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,399 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + if (output_left && output_right){ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + } + else{ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, &tks, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + forkskinny_128_256_tweakey_schedule_t tks; + + /* Iterate the tweakey schedule */ + forkskinny_128_256_init_tks(&tks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, &tks, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + } + else{ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, &tks, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + forkskinny_128_384_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_128_384_init_tks(&tks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); + + + /* Unpack the input */ + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, &tks, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + if (output_left && output_right){ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + } + else{ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); + } + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, &tks, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + forkskinny_64_192_tweakey_schedule_t tks; + + /* Iterate key schedule */ + forkskinny_64_192_init_tks(&tks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Unpack the input */ + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, &tks, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny.c index c43ef98..4a7a01c 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny.c @@ -40,37 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; +#if !defined(__AVR__) -typedef struct +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds) { - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; - - -} forkskinny_128_256_key_schedule_t; - -static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, const unsigned char key[32], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -81,8 +54,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -94,8 +67,8 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c TK[3] = le_load_word32(key + 28); /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -103,51 +76,48 @@ static void forkskinny_128_256_init_tks(forkskinny_128_256_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -156,95 +126,8 @@ static void forkskinny_128_256_round state->S[3] = s3; } -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - } - else{ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + FORKSKINNY_128_256_ROUNDS_AFTER); - } - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, forkskinny_128_256_key_schedule_t *ks, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -255,33 +138,37 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -290,96 +177,8 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - forkskinny_128_256_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_256_init_tks(&ks, key, FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - //uint32_t TK1[4]; /**< First part of the tweakey */ - //uint32_t TK2[4]; /**< Second part of the tweakey */ - //uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; - - -} forkskinny_128_384_key_schedule_t; - -static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, const unsigned char key[48], uint8_t nb_rounds){ uint32_t TK[4]; unsigned round; @@ -390,8 +189,8 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[3] = le_load_word32(key + 12); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny128_permute_tk(TK); } @@ -401,10 +200,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 20); TK[2] = le_load_word32(key + 24); TK[3] = le_load_word32(key + 28); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR2(TK[0]); @@ -416,10 +216,11 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c TK[1] = le_load_word32(key + 36); TK[2] = le_load_word32(key + 40); TK[3] = le_load_word32(key + 44); + /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny128_permute_tk(TK); skinny128_LFSR3(TK[0]); @@ -427,51 +228,48 @@ static void forkskinny_128_384_init_tks(forkskinny_128_384_key_schedule_t *ks, c } } -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + s0 = state->S[0]; + s1 = state->S[1]; + s2 = state->S[2]; + s3 = state->S[3]; + + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -480,97 +278,8 @@ static void forkskinny_128_384_round state->S[3] = s3; } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - } - else{ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, &ks, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, forkskinny_128_384_key_schedule_t *ks, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -581,33 +290,37 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= ks->row1[round] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= tks->row1[first] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -616,96 +329,9 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - forkskinny_128_384_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_128_384_init_tks(&ks, key, FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER); - - /* Unpack the input */ - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, &ks, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, &ks, round); - } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); -} - -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds) { - //uint16_t TK1[4]; /**< First part of the tweakey */ - //uint16_t TK2[4]; /**< Second part of the tweakey */ - //uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -typedef struct -{ - /** Words of the full key schedule */ - uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; - - -} forkskinny_64_192_key_schedule_t; - -static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, const unsigned char key[24], uint8_t nb_rounds){ uint16_t TK[4]; unsigned round; @@ -716,8 +342,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 6); /* Initiate key schedule with permutations of TK1 */ for(round = 0; roundrow0[round] = TK[0]; - ks->row1[round] = TK[1]; + tks->row0[round] = TK[0]; + tks->row1[round] = TK[1]; skinny64_permute_tk(TK); } @@ -727,10 +353,11 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[1] = be_load_word16(key + 10); TK[2] = be_load_word16(key + 12); TK[3] = be_load_word16(key + 14); + /* Process second Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR2(TK[0]); @@ -744,8 +371,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con TK[3] = be_load_word16(key + 22); /* Process third Tweakey and add it to the key schedule */ for(round = 0; roundrow0[round] ^= TK[0]; - ks->row1[round] ^= TK[1]; + tks->row0[round] ^= TK[0]; + tks->row1[round] ^= TK[1]; skinny64_permute_tk(TK); skinny64_LFSR3(TK[0]); @@ -753,17 +380,8 @@ static void forkskinny_64_192_init_tks(forkskinny_64_192_key_schedule_t *ks, con } } -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -774,131 +392,44 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - if (output_left && output_right){ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - } - else{ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + FORKSKINNY_64_192_ROUNDS_AFTER); - } - - /* Unpack the input */ - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, &ks, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, forkskinny_64_192_key_schedule_t *ks, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -909,33 +440,35 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= ks->row0[round] ^ ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= ks->row1[round] ^ ((rc & 0x70) << 8); - s2 ^= 0x2000; - - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= tks->row0[first] ^ ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= tks->row1[first] ^ ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -944,61 +477,4 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - forkskinny_64_192_key_schedule_t ks; - unsigned round; - - /* Iterate key schedule */ - forkskinny_64_192_init_tks(&ks, key, FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER); - - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, &ks, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, &ks, round); - } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); -} +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny.h index 0c1a707..1b4f5bf 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,172 @@ extern "C" { #endif /** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +/** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_256_ROUNDS_BEFORE + 2*FORKSKINNY_128_256_ROUNDS_AFTER)]; + +} forkskinny_128_256_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + // uint32_t TK1[4]; /**< First part of the tweakey */ + // uint32_t TK2[4]; /**< Second part of the tweakey */ + // uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint32_t row0[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + uint32_t row1[(FORKSKINNY_128_384_ROUNDS_BEFORE + 2*FORKSKINNY_128_384_ROUNDS_AFTER)]; + +} forkskinny_128_384_tweakey_schedule_t; + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +typedef struct +{ + /** Words of the full key schedule */ + uint16_t row0[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + uint16_t row1[(FORKSKINNY_64_192_ROUNDS_BEFORE + 2*FORKSKINNY_64_192_ROUNDS_AFTER)]; + + +} forkskinny_64_192_tweakey_schedule_t; + + +void forkskinny_128_256_init_tks(forkskinny_128_256_tweakey_schedule_t *tks, const unsigned char key[32], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, forkskinny_128_256_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_128_384_init_tks(forkskinny_128_384_tweakey_schedule_t *tks, const unsigned char key[48], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, forkskinny_128_384_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +void forkskinny_64_192_init_tks(forkskinny_64_192_tweakey_schedule_t *tks, const unsigned char key[24], uint8_t nb_rounds); + + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, forkskinny_64_192_tweakey_schedule_t *tks, unsigned first, unsigned last); + + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_dec/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/aead-common.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/aead-common.h index 2be95eb..8429f59 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/aead-common.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/aead-common.h @@ -154,7 +154,7 @@ typedef void (*aead_xof_squeeze_t) /** * \brief No special AEAD features. */ -#define AEAD_FLAG_NONE 0x0000 +#define AEAD_FLAG_NONE 0x0000 /** * \brief The natural byte order of the AEAD cipher is little-endian. @@ -166,7 +166,18 @@ typedef void (*aead_xof_squeeze_t) * numbers as nonces. The application needs to know whether the sequence * number should be packed into the leading or trailing bytes of the nonce. */ -#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief The AEAD mode provides side-channel protection for the key. + */ +#define AEAD_FLAG_SC_PROTECT_KEY 0x0002 + +/** + * \brief The AEAD mode provides side-channel protection for all block + * operations. + */ +#define AEAD_FLAG_SC_PROTECT_ALL 0x0004 /** * \brief Meta-information about an AEAD cipher. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/forkae.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/forkae.c index 4a9671a..5b7ba3d 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/forkae.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/forkae.c @@ -22,7 +22,7 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" +#include "internal-skinnyutil.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +138,458 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + store_column_8(output_right,fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + + /* State stored per column */ + load_column_8(state.S, input); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x51051001; /* Branching constant */ + state.S[1] ^= 0xa20a2002; + state.S[2] ^= 0x44144104; + state.S[3] ^= 0x88288208; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + store_column_8(output_left, state.S); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, state.S); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point, convert state to columns */ + memcpy(fstate.TK1, state.TK1, 16); + memcpy(fstate.TK2, state.TK2, 16); + memcpy(fstate.TK3, state.TK3, 16); + rows_to_columns_32(fstate.S[0],fstate.S[1],fstate.S[2],fstate.S[3],state.S[0],state.S[1], state.S[2], state.S[3]); + + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + store_column_8(output_right, fstate.S); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny.c index af29f77..b96a04c 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny.c @@ -22,12 +22,23 @@ #include "internal-forkskinny.h" #include "internal-skinnyutil.h" -#include /** * \brief 7-bit round constants for all ForkSkinny block ciphers. */ -static unsigned char const RC[87] = {0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b,0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10}; +static unsigned char const RC[87] = { + 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7e, 0x7d, + 0x7b, 0x77, 0x6f, 0x5f, 0x3e, 0x7c, 0x79, 0x73, + 0x67, 0x4f, 0x1e, 0x3d, 0x7a, 0x75, 0x6b, 0x57, + 0x2e, 0x5c, 0x38, 0x70, 0x61, 0x43, 0x06, 0x0d, + 0x1b, 0x37, 0x6e, 0x5d, 0x3a, 0x74, 0x69, 0x53, + 0x26, 0x4c, 0x18, 0x31, 0x62, 0x45, 0x0a, 0x15, + 0x2b, 0x56, 0x2c, 0x58, 0x30, 0x60, 0x41, 0x02, + 0x05, 0x0b, 0x17, 0x2f, 0x5e, 0x3c, 0x78, 0x71, + 0x63, 0x47, 0x0e, 0x1d, 0x3b, 0x76, 0x6d, 0x5b, + 0x36, 0x6c, 0x59, 0x32, 0x64, 0x49, 0x12, 0x25, + 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 +}; static const uint32_t T[256] = {0x65656565, 0x4c4c4c4c, 0x6a6a6a6a, 0x42424242, 0x4b4b4b4b, 0x63636363, 0x43434343, 0x6b6b6b6b, 0x55555555, 0x75757575, 0x5a5a5a5a, 0x7a7a7a7a, 0x53535353, 0x73737373, 0x5b5b5b5b, 0x7b7b7b7b, 0x35353535, 0x8c8c8c8c, 0x3a3a3a3a, 0x81818181, 0x89898989, 0x33333333, 0x80808080, 0x3b3b3b3b, 0x95959595, 0x25252525, 0x98989898, 0x2a2a2a2a, 0x90909090, 0x23232323, 0x99999999, 0x2b2b2b2b, 0xe5e5e5e5, 0xcccccccc, 0xe8e8e8e8, 0xc1c1c1c1, 0xc9c9c9c9, 0xe0e0e0e0, 0xc0c0c0c0, 0xe9e9e9e9, 0xd5d5d5d5, 0xf5f5f5f5, 0xd8d8d8d8, 0xf8f8f8f8, 0xd0d0d0d0, 0xf0f0f0f0, 0xd9d9d9d9, 0xf9f9f9f9, 0xa5a5a5a5, 0x1c1c1c1c, 0xa8a8a8a8, 0x12121212, 0x1b1b1b1b, 0xa0a0a0a0, 0x13131313, 0xa9a9a9a9, 0x05050505, 0xb5b5b5b5, 0x0a0a0a0a, 0xb8b8b8b8, 0x03030303, 0xb0b0b0b0, 0x0b0b0b0b, 0xb9b9b9b9, 0x32323232, 0x88888888, 0x3c3c3c3c, 0x85858585, 0x8d8d8d8d, 0x34343434, 0x84848484, 0x3d3d3d3d, 0x91919191, 0x22222222, 0x9c9c9c9c, 0x2c2c2c2c, 0x94949494, 0x24242424, 0x9d9d9d9d, 0x2d2d2d2d, 0x62626262, 0x4a4a4a4a, 0x6c6c6c6c, 0x45454545, 0x4d4d4d4d, 0x64646464, 0x44444444, 0x6d6d6d6d, 0x52525252, 0x72727272, 0x5c5c5c5c, 0x7c7c7c7c, 0x54545454, 0x74747474, 0x5d5d5d5d, 0x7d7d7d7d, 0xa1a1a1a1, 0x1a1a1a1a, 0xacacacac, 0x15151515, 0x1d1d1d1d, 0xa4a4a4a4, 0x14141414, 0xadadadad, 0x02020202, 0xb1b1b1b1, 0x0c0c0c0c, 0xbcbcbcbc, 0x04040404, 0xb4b4b4b4, 0x0d0d0d0d, 0xbdbdbdbd, 0xe1e1e1e1, 0xc8c8c8c8, 0xecececec, 0xc5c5c5c5, 0xcdcdcdcd, 0xe4e4e4e4, 0xc4c4c4c4, 0xedededed, 0xd1d1d1d1, 0xf1f1f1f1, 0xdcdcdcdc, 0xfcfcfcfc, 0xd4d4d4d4, 0xf4f4f4f4, 0xdddddddd, 0xfdfdfdfd, 0x36363636, 0x8e8e8e8e, 0x38383838, 0x82828282, 0x8b8b8b8b, 0x30303030, 0x83838383, 0x39393939, 0x96969696, 0x26262626, 0x9a9a9a9a, 0x28282828, 0x93939393, 0x20202020, 0x9b9b9b9b, 0x29292929, 0x66666666, 0x4e4e4e4e, 0x68686868, 0x41414141, 0x49494949, 0x60606060, 0x40404040, 0x69696969, 0x56565656, 0x76767676, 0x58585858, 0x78787878, 0x50505050, 0x70707070, 0x59595959, 0x79797979, 0xa6a6a6a6, 0x1e1e1e1e, 0xaaaaaaaa, 0x11111111, 0x19191919, 0xa3a3a3a3, 0x10101010, 0xabababab, 0x06060606, 0xb6b6b6b6, 0x08080808, 0xbabababa, 0x00000000, 0xb3b3b3b3, 0x09090909, 0xbbbbbbbb, 0xe6e6e6e6, 0xcececece, 0xeaeaeaea, 0xc2c2c2c2, 0xcbcbcbcb, 0xe3e3e3e3, 0xc3c3c3c3, 0xebebebeb, 0xd6d6d6d6, 0xf6f6f6f6, 0xdadadada, 0xfafafafa, 0xd3d3d3d3, 0xf3f3f3f3, 0xdbdbdbdb, 0xfbfbfbfb, 0x31313131, 0x8a8a8a8a, 0x3e3e3e3e, 0x86868686, 0x8f8f8f8f, 0x37373737, 0x87878787, 0x3f3f3f3f, 0x92929292, 0x21212121, 0x9e9e9e9e, 0x2e2e2e2e, 0x97979797, 0x27272727, 0x9f9f9f9f, 0x2f2f2f2f, 0x61616161, 0x48484848, 0x6e6e6e6e, 0x46464646, 0x4f4f4f4f, 0x67676767, 0x47474747, 0x6f6f6f6f, 0x51515151, 0x71717171, 0x5e5e5e5e, 0x7e7e7e7e, 0x57575757, 0x77777777, 0x5f5f5f5f, 0x7f7f7f7f, 0xa2a2a2a2, 0x18181818, 0xaeaeaeae, 0x16161616, 0x1f1f1f1f, 0xa7a7a7a7, 0x17171717, 0xafafafaf, 0x01010101, 0xb2b2b2b2, 0x0e0e0e0e, 0xbebebebe, 0x07070707, 0xb7b7b7b7, 0x0f0f0f0f, 0xbfbfbfbf, 0xe2e2e2e2, 0xcacacaca, 0xeeeeeeee, 0xc6c6c6c6, 0xcfcfcfcf, 0xe7e7e7e7, 0xc7c7c7c7, 0xefefefef, 0xd2d2d2d2, 0xf2f2f2f2, 0xdededede, 0xfefefefe, 0xd7d7d7d7, 0xf7f7f7f7, 0xdfdfdfdf, 0xffffffff}; static const uint32_t T_inv[256] = {0xacacacac, 0xe8e8e8e8, 0x68686868, 0x3c3c3c3c, 0x6c6c6c6c, 0x38383838, 0xa8a8a8a8, 0xecececec, 0xaaaaaaaa, 0xaeaeaeae, 0x3a3a3a3a, 0x3e3e3e3e, 0x6a6a6a6a, 0x6e6e6e6e, 0xeaeaeaea, 0xeeeeeeee, 0xa6a6a6a6, 0xa3a3a3a3, 0x33333333, 0x36363636, 0x66666666, 0x63636363, 0xe3e3e3e3, 0xe6e6e6e6, 0xe1e1e1e1, 0xa4a4a4a4, 0x61616161, 0x34343434, 0x31313131, 0x64646464, 0xa1a1a1a1, 0xe4e4e4e4, 0x8d8d8d8d, 0xc9c9c9c9, 0x49494949, 0x1d1d1d1d, 0x4d4d4d4d, 0x19191919, 0x89898989, 0xcdcdcdcd, 0x8b8b8b8b, 0x8f8f8f8f, 0x1b1b1b1b, 0x1f1f1f1f, 0x4b4b4b4b, 0x4f4f4f4f, 0xcbcbcbcb, 0xcfcfcfcf, 0x85858585, 0xc0c0c0c0, 0x40404040, 0x15151515, 0x45454545, 0x10101010, 0x80808080, 0xc5c5c5c5, 0x82828282, 0x87878787, 0x12121212, 0x17171717, 0x42424242, 0x47474747, 0xc2c2c2c2, 0xc7c7c7c7, 0x96969696, 0x93939393, 0x03030303, 0x06060606, 0x56565656, 0x53535353, 0xd3d3d3d3, 0xd6d6d6d6, 0xd1d1d1d1, 0x94949494, 0x51515151, 0x04040404, 0x01010101, 0x54545454, 0x91919191, 0xd4d4d4d4, 0x9c9c9c9c, 0xd8d8d8d8, 0x58585858, 0x0c0c0c0c, 0x5c5c5c5c, 0x08080808, 0x98989898, 0xdcdcdcdc, 0x9a9a9a9a, 0x9e9e9e9e, 0x0a0a0a0a, 0x0e0e0e0e, 0x5a5a5a5a, 0x5e5e5e5e, 0xdadadada, 0xdededede, 0x95959595, 0xd0d0d0d0, 0x50505050, 0x05050505, 0x55555555, 0x00000000, 0x90909090, 0xd5d5d5d5, 0x92929292, 0x97979797, 0x02020202, 0x07070707, 0x52525252, 0x57575757, 0xd2d2d2d2, 0xd7d7d7d7, 0x9d9d9d9d, 0xd9d9d9d9, 0x59595959, 0x0d0d0d0d, 0x5d5d5d5d, 0x09090909, 0x99999999, 0xdddddddd, 0x9b9b9b9b, 0x9f9f9f9f, 0x0b0b0b0b, 0x0f0f0f0f, 0x5b5b5b5b, 0x5f5f5f5f, 0xdbdbdbdb, 0xdfdfdfdf, 0x16161616, 0x13131313, 0x83838383, 0x86868686, 0x46464646, 0x43434343, 0xc3c3c3c3, 0xc6c6c6c6, 0x41414141, 0x14141414, 0xc1c1c1c1, 0x84848484, 0x11111111, 0x44444444, 0x81818181, 0xc4c4c4c4, 0x1c1c1c1c, 0x48484848, 0xc8c8c8c8, 0x8c8c8c8c, 0x4c4c4c4c, 0x18181818, 0x88888888, 0xcccccccc, 0x1a1a1a1a, 0x1e1e1e1e, 0x8a8a8a8a, 0x8e8e8e8e, 0x4a4a4a4a, 0x4e4e4e4e, 0xcacacaca, 0xcececece, 0x35353535, 0x60606060, 0xe0e0e0e0, 0xa5a5a5a5, 0x65656565, 0x30303030, 0xa0a0a0a0, 0xe5e5e5e5, 0x32323232, 0x37373737, 0xa2a2a2a2, 0xa7a7a7a7, 0x62626262, 0x67676767, 0xe2e2e2e2, 0xe7e7e7e7, 0x3d3d3d3d, 0x69696969, 0xe9e9e9e9, 0xadadadad, 0x6d6d6d6d, 0x39393939, 0xa9a9a9a9, 0xedededed, 0x3b3b3b3b, 0x3f3f3f3f, 0xabababab, 0xafafafaf, 0x6b6b6b6b, 0x6f6f6f6f, 0xebebebeb, 0xefefefef, 0x26262626, 0x23232323, 0xb3b3b3b3, 0xb6b6b6b6, 0x76767676, 0x73737373, 0xf3f3f3f3, 0xf6f6f6f6, 0x71717171, 0x24242424, 0xf1f1f1f1, 0xb4b4b4b4, 0x21212121, 0x74747474, 0xb1b1b1b1, 0xf4f4f4f4, 0x2c2c2c2c, 0x78787878, 0xf8f8f8f8, 0xbcbcbcbc, 0x7c7c7c7c, 0x28282828, 0xb8b8b8b8, 0xfcfcfcfc, 0x2a2a2a2a, 0x2e2e2e2e, 0xbabababa, 0xbebebebe, 0x7a7a7a7a, 0x7e7e7e7e, 0xfafafafa, 0xfefefefe, 0x25252525, 0x70707070, 0xf0f0f0f0, 0xb5b5b5b5, 0x75757575, 0x20202020, 0xb0b0b0b0, 0xf5f5f5f5, 0x22222222, 0x27272727, 0xb2b2b2b2, 0xb7b7b7b7, 0x72727272, 0x77777777, 0xf2f2f2f2, 0xf7f7f7f7, 0x2d2d2d2d, 0x79797979, 0xf9f9f9f9, 0xbdbdbdbd, 0x7d7d7d7d, 0x29292929, 0xb9b9b9b9, 0xfdfdfdfd, 0x2b2b2b2b, 0x2f2f2f2f, 0xbbbbbbbb, 0xbfbfbfbf, 0x7b7b7b7b, 0x7f7f7f7f, 0xfbfbfbfb, 0xffffffff}; @@ -36,77 +47,12 @@ static const uint32_t AC_column0[87] = {0x1000101, 0x3000303, 0x7000707, 0xf000f static const uint32_t AC_column1[87] = {0x0, 0x0, 0x0, 0x0, 0x10000, 0x30000, 0x70000, 0x70000, 0x70000, 0x70000, 0x60000, 0x50000, 0x30000, 0x70000, 0x70000, 0x70000, 0x60000, 0x40000, 0x10000, 0x30000, 0x70000, 0x70000, 0x60000, 0x50000, 0x20000, 0x50000, 0x30000, 0x70000, 0x60000, 0x40000, 0x0, 0x0, 0x10000, 0x30000, 0x60000, 0x50000, 0x30000, 0x70000, 0x60000, 0x50000, 0x20000, 0x40000, 0x10000, 0x30000, 0x60000, 0x40000, 0x0, 0x10000, 0x20000, 0x50000, 0x20000, 0x50000, 0x30000, 0x60000, 0x40000, 0x0, 0x0, 0x0, 0x10000, 0x20000, 0x50000, 0x30000, 0x70000, 0x70000, 0x60000, 0x40000, 0x0, 0x10000, 0x30000, 0x70000, 0x60000, 0x50000, 0x30000, 0x60000, 0x50000, 0x30000, 0x60000, 0x40000, 0x10000, 0x20000, 0x40000, 0x10000, 0x20000, 0x50000, 0x20000, 0x40000, 0x10000}; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 +#if !defined(__AVR__) -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_256_state_t; - -#define load_column_8(dest, src) \ - do { \ - dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ - dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ - dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ - dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ - } while(0) - -#define store_column_8(dest, src) \ - do { \ - dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ - dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ - dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ - dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ - } while(0) - -#define rows_to_columns_32(columns, row0, row1, row2, row3) \ - do { \ - columns[0] = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ - columns[1] = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ - columns[2] = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ - columns[3] = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ - } while(0) - -#define columns_to_rows_32(rows, column0, column1, column2, column3) rows_to_columns_32(rows, column0, column1, column2, column3) - -#define TK_to_column_256(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -115,102 +61,45 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_256(tk_columns, state); + /* Perform all requested rounds */ + for (; first < last; ++first) { - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + TK_to_column_256(tk_columns, state); - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_left, state.S); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - store_column_8(output_right, state.S); - } + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; } -static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; + + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -219,215 +108,128 @@ static void forkskinny_128_256_inv_round_first(forkskinny_128_256_state_t *state skinny128_inv_permute_tk(state->TK2); /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_256_inv_round(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - // temp = 0x020000; - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} - -static void forkskinny_128_256_inv_round_final(forkskinny_128_256_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FINAL ROUND - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_256_inv_round_first(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER*2)-1); - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } - forkskinny_128_256_inv_round_final(&state, (FORKSKINNY_128_256_ROUNDS_BEFORE+FORKSKINNY_128_256_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - } - - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); +} - /* Generate the left output block after another "before" rounds */ - forkskinny_128_256_inv_round_first(&state, FORKSKINNY_128_256_ROUNDS_BEFORE-1); - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - forkskinny_128_256_inv_round_final(&state, 0); - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - store_column_8(output_right,fstate.S); - } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -#define TK_to_column_384(columns, state) \ - do { \ - uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ - uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ - uint32_t tk00 = TK0 & 0xFF; \ - uint32_t tk01 = TK0 & 0xFF00;\ - uint32_t tk02 = TK0 & 0xFF0000;\ - uint32_t tk03 = TK0 & 0xFF000000;\ - columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ - columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ - columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ - columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ - } while(0) - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { - uint32_t s0, s1, s2, s3; + uint32_t s0, s1, s2, s3, t0, t1, t2, t3; uint32_t tk_columns[4]; /* Load the state into local variables */ @@ -436,148 +238,48 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - TK_to_column_384(tk_columns, state); - - state->S[0] = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[round]; - state->S[1] = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[round]; - state->S[2] = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; - state->S[3] = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); -} + /* Perform all requested rounds */ + for (; first < last; ++first) { -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - - /* State stored per column */ - load_column_8(state.S, input); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } + TK_to_column_384(tk_columns, state); - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + t0 = (T[s0 & 0xff]&0xff00ffff) ^ (T[(s3>>8) & 0xff]&0x00ff0000) ^ (T[(s2>>16) & 0xff]&0xffff00ff) ^ (T[(s1>>24)]&0xff) ^ tk_columns[0] ^ AC_column0[first]; + t1 = (T[s1 & 0xff]&0xff00ffff) ^ (T[(s0>>8) & 0xff]&0x00ff0000) ^ (T[(s3>>16) & 0xff]&0xffff00ff) ^ (T[(s2>>24)]&0xff) ^ tk_columns[1] ^ AC_column1[first]; + t2 = (T[s2 & 0xff]&0xff00ffff) ^ (T[(s1>>8) & 0xff]&0x00ff0000) ^ (T[(s0>>16) & 0xff]&0xffff00ff) ^ (T[(s3>>24)]&0xff) ^ tk_columns[2] ^ 0x00020200; + t3 = (T[s3 & 0xff]&0xff00ffff) ^ (T[(s2>>8) & 0xff]&0x00ff0000) ^ (T[(s1>>16) & 0xff]&0xffff00ff) ^ (T[(s0>>24)]&0xff) ^ tk_columns[3]; - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x51051001; /* Branching constant */ - state.S[1] ^= 0xa20a2002; - state.S[2] ^= 0x44144104; - state.S[3] ^= 0x88288208; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_left, state.S); - - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - store_column_8(output_right, state.S); + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + + s0 = t0; s1 = t1; s2 = t2; s3 = t3; } -} + /* Save the local variables back to the state */ + state->S[0] = s0; + state->S[1] = s1; + state->S[2] = s2; + state->S[3] = s3; +} -static void forkskinny_128_384_inv_round_first(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, temp; - uint8_t rc; +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) +{ + uint32_t s0, s1, s2, s3, t0, t1, t2, t3, tk0, tk1; + uint8_t rc; /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); + t0 = state->S[0]; + t1 = state->S[1]; + t2 = state->S[2]; + t3 = state->S[3]; - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - temp = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s1 ^= leftRotate8(temp); - s2 ^= 0x020000; - - /* Save the local variables back to the state but first convert them to columns*/ - rows_to_columns_32(state->S, s0, s1, s2, s3); -} - -static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3, tk0, tk1; - uint8_t rc; - - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; + // FIRST ROUND /* Permute TK1 and TK2 for the next round */ skinny128_inv_LFSR2(state->TK2[0]); @@ -588,177 +290,144 @@ static void forkskinny_128_384_inv_round(forkskinny_128_384_state_t *state, unsi skinny128_inv_permute_tk(state->TK2); skinny128_inv_permute_tk(state->TK3); - - state->S[0] = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); - state->S[1] = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); - state->S[2] = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); - state->S[3] = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + /* Inverse mix of the columns */ + s0 = t0; + t0 = t1; + t1 = t2; + t2 = t3; + t3 = s0 ^ t2; + t2 ^= t0; + t1 ^= t2; /* XOR the shifted round constant and the shifted subkey for this round */ - rc = RC[round]; - tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; - tk1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - tk1 = leftRotate8(tk1); - - state->S[0] ^= (((tk0) &0xff) | ((tk1<<8)&0xff00)); - state->S[1] ^= (((tk0>>8) &0xff) | ((tk1)&0xff00)); - state->S[2] ^= (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; - state->S[3] ^= (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); -} + rc = RC[--first]; + t0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + t1 ^= leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + t2 ^= 0x020000; + + /* Save the local variables in temp but first convert them to columns*/ + rows_to_columns_32(s0, s1, s2, s3, t0, t1, t2, t3); + + + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + t0 = (T_inv[s0 & 0xff]&0xff000000) ^ (T_inv[(s1>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s2>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s3>>24)]&0xffffff00); + t1 = (T_inv[s1 & 0xff]&0xff000000) ^ (T_inv[(s2>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s3>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s0>>24)]&0xffffff00); + t2 = (T_inv[s2 & 0xff]&0xff000000) ^ (T_inv[(s3>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s0>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s1>>24)]&0xffffff00); + t3 = (T_inv[s3 & 0xff]&0xff000000) ^ (T_inv[(s0>>8) & 0xff]&0x00ffffff) ^ (T_inv[(s1>>16) & 0xff]&0x0000ff00) ^ (T_inv[(s2>>24)]&0xffffff00); + + /* XOR the shifted round constant and the shifted subkey for this round */ + rc = RC[--first]; + tk0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ (rc & 0x0F) ^ 0x00020000; + tk1 = leftRotate8((state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4))); + + s0 = t0 ^ (((tk0) &0xff) | ((tk1<<8)&0xff00)); + s1 = t1 ^ (((tk0>>8) &0xff) | ((tk1)&0xff00)); + s2 = t2 ^ (((tk0>>16)&0xff) | ((tk1>>8)&0xff00)) ^ 0x020000; + s3 = t3 ^ (((tk0>>24)&0xff) | ((tk1>>16)&0xff00)); + } -static void forkskinny_128_384_inv_round_final(forkskinny_128_384_state_t *state, unsigned round){ - uint32_t s0, s1, s2, s3; + // FINAL ROUND - /* Load the state into local variables */ - s0 = state->S[0]; - s1 = state->S[1]; - s2 = state->S[2]; - s3 = state->S[3]; - - /* Apply the inverse of the S-box to all cells in the state */ + /* Apply the inverse of the S-box to all cells in the state */ skinny128_inv_sbox(s0); skinny128_inv_sbox(s1); skinny128_inv_sbox(s2); skinny128_inv_sbox(s3); /* Save the local variables back to the state but first convert them back to rows*/ - columns_to_rows_32(state->S, s0, s1, s2, s3); + columns_to_rows_32(t0, t1, t2, t3, s0, s1, s2, s3); /* Shift the cells in the rows left, which moves the cell * values down closer to the LSB. That is, we do a right * rotate on the word to rotate the cells in the word left */ - state->S[1] = rightRotate8(state->S[1]); - state->S[2] = rightRotate16(state->S[2]); - state->S[3] = rightRotate24(state->S[3]); + state->S[0] = t0; + state->S[1] = rightRotate8(t1); + state->S[2] = rightRotate16(t2); + state->S[3] = rightRotate24(t3); } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); - } - - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - forkskinny_128_384_inv_round_first(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1); - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2)-1; - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - forkskinny_128_384_inv_round_final(&state, (FORKSKINNY_128_384_ROUNDS_BEFORE + FORKSKINNY_128_384_ROUNDS_AFTER)); - - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point, convert state to columns */ - memcpy(fstate.TK1, state.TK1, 16); - memcpy(fstate.TK2, state.TK2, 16); - memcpy(fstate.TK3, state.TK3, 16); - rows_to_columns_32(fstate.S,state.S[0],state.S[1], state.S[2], state.S[3]); - - /* Generate the left output block after another "before" rounds */ - forkskinny_128_384_inv_round_first(&state, FORKSKINNY_128_384_ROUNDS_BEFORE - 1); - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE-1; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } - forkskinny_128_384_inv_round_final(&state, 0); - - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); - } - store_column_8(output_right, fstate.S); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; + } -} forkskinny_64_192_state_t; + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; + } +} -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -769,144 +438,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -917,42 +497,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -961,92 +544,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-skinnyutil.h index 83136cb..f2bc8ca 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/opt32_table/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: @@ -319,6 +349,61 @@ do { \ x = ((x << 1) & 0xEEEEU) | ((x >> 3) & 0x1111U); \ } while (0) +#define rows_to_columns_32(column0, column1, column2, column3, row0, row1, row2, row3) \ + do { \ + column0 = (row3 & 0xFF) << 24|(row2 & 0xFF) << 16|(row1 & 0xFF) << 8 | (row0 & 0xFF);\ + column1 = (row3 & 0xFF00) << 16|(row2 & 0xFF00) << 8 |(row1 & 0xFF00) | (row0>>8 & 0xFF);\ + column2 = (row3 & 0xFF0000) << 8 |(row2 & 0xFF0000) |(row1 & 0xFF0000) >> 8 | (row0>>16 & 0xFF);\ + column3 = (row3 & 0xFF000000) |(row2 & 0xFF000000) >> 8 |(row1 & 0xFF000000) >> 16| (row0>>24 & 0xFF);\ + } while(0) + +#define columns_to_rows_32(row0, row1, row2, row3, column0, column1, column2, column3) rows_to_columns_32(row0, row1, row2, row3, column0, column1, column2, column3) + +#define load_column_8(dest, src) \ + do { \ + dest[0] = (src[12]) << 24 | (src[8]) << 16 | (src[4]) << 8 | (src[0]); \ + dest[1] = (src[13]) << 24 | (src[9]) << 16 | (src[5]) << 8 | (src[1]); \ + dest[2] = (src[14]) << 24 | (src[10]) << 16 | (src[6]) << 8 | (src[2]); \ + dest[3] = (src[15]) << 24 | (src[11]) << 16 | (src[7]) << 8 | (src[3]); \ + } while(0) + +#define store_column_8(dest, src) \ + do { \ + dest[0] = (uint8_t) (src[0]); dest[1] = (uint8_t) (src[1]); dest[2] = (uint8_t) (src[2]); dest[3] = (uint8_t) (src[3]); \ + dest[4] = (uint8_t) (src[0]>>8); dest[5] = (uint8_t) (src[1]>>8); dest[6] = (uint8_t) (src[2]>>8); dest[7] = (uint8_t) (src[3]>>8); \ + dest[8] = (uint8_t) (src[0]>>16);dest[9] = (uint8_t) (src[1]>>16);dest[10]= (uint8_t) (src[2]>>16);dest[11]= (uint8_t)(src[3]>>16); \ + dest[12]= (uint8_t) (src[0]>>24);dest[13]= (uint8_t) (src[1]>>24);dest[14]= (uint8_t) (src[2]>>24);dest[15]= (uint8_t)(src[3]>>24); \ + } while(0) + + +#define TK_to_column_256(columns, state) \ + do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1]; \ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ + } while(0) + +#define TK_to_column_384(columns, state) \ +do { \ + uint32_t TK0 = state->TK1[0] ^ state->TK2[0] ^ state->TK3[0];\ + uint32_t TK1 = state->TK1[1] ^ state->TK2[1] ^ state->TK3[1];\ + uint32_t tk00 = TK0 & 0xFF; \ + uint32_t tk01 = TK0 & 0xFF00;\ + uint32_t tk02 = TK0 & 0xFF0000;\ + uint32_t tk03 = TK0 & 0xFF000000;\ + columns[0] = tk00 << 24 | (TK1 & 0xFF000000) >> 8 | tk00 << 8 | tk00; \ + columns[1] = tk01 << 16 | (TK1 & 0xFF) << 16 | tk01 | tk01 >> 8; \ + columns[2] = tk02 << 8 | (TK1 & 0xFF00) << 8 | tk02 >> 8 | tk02 >> 16; \ + columns[3] = tk03 | (TK1 & 0xFF0000) | tk03 >> 16 | tk03 >> 24; \ +} while(0) + /** @endcond */ #ifdef __cplusplus -- libgit2 0.26.0