From b941942086cff43b4c55cb00e97a94f58bc0b9f0 Mon Sep 17 00:00:00 2001 From: Sebastien Riou Date: Sun, 31 May 2020 07:57:42 +0000 Subject: [PATCH] drygascon add_arm_cortex-m --- drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/aead-common.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/aead-common.h | 256 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/api.h | 5 +++++ drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon.c |drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon.h | 437 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v6m.S |drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m.S |drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu.S | 692 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu_x.S |drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm_selector.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/encrypt.c | 25 +++++++++++++++++++++++++ drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/implementors | 2 ++ drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.c |drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.h | 379 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-util.h | 702 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/nistlwc | 0 drygascon/Implementations/crypto_aead/drygascon128/designers | 1 + drygascon/Implementations/crypto_aead/drygascon128/ref/implementors | 1 + drygascon/Implementations/crypto_aead/drygascon128/ref/nistlwc | 0 19 files changed, 5941 insertions(+) create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/aead-common.c create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/aead-common.h create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/api.h create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon.c create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon.h create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v6m.S create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m.S create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu.S create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu_x.S create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm_selector.h create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/encrypt.c create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/implementors create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.c create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.h create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-util.h create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/nistlwc create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/designers create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/ref/implementors create mode 100644 drygascon/Implementations/crypto_aead/drygascon128/ref/nistlwc diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/aead-common.c b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/aead-common.c new file mode 100644 index 0000000..84fc53a --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/aead-common.c @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "aead-common.h" + +int aead_check_tag + (unsigned char *plaintext, unsigned long long plaintext_len, + const unsigned char *tag1, const unsigned char *tag2, + unsigned size) +{ + /* Set "accum" to -1 if the tags match, or 0 if they don't match */ + int accum = 0; + while (size > 0) { + accum |= (*tag1++ ^ *tag2++); + --size; + } + accum = (accum - 1) >> 8; + + /* Destroy the plaintext if the tag match failed */ + while (plaintext_len > 0) { + *plaintext++ &= accum; + --plaintext_len; + } + + /* If "accum" is 0, return -1, otherwise return 0 */ + return ~accum; +} + +int aead_check_tag_precheck + (unsigned char *plaintext, unsigned long long plaintext_len, + const unsigned char *tag1, const unsigned char *tag2, + unsigned size, int precheck) +{ + /* Set "accum" to -1 if the tags match, or 0 if they don't match */ + int accum = 0; + while (size > 0) { + accum |= (*tag1++ ^ *tag2++); + --size; + } + accum = ((accum - 1) >> 8) & precheck; + + /* Destroy the plaintext if the tag match failed */ + while (plaintext_len > 0) { + *plaintext++ &= accum; + --plaintext_len; + } + + /* If "accum" is 0, return -1, otherwise return 0 */ + return ~accum; +} diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/aead-common.h b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/aead-common.h new file mode 100644 index 0000000..41a0764 --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/aead-common.h @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LWCRYPTO_AEAD_COMMON_H +#define LWCRYPTO_AEAD_COMMON_H + +#include + +/** + * \file aead-common.h + * \brief Definitions that are common across AEAD schemes. + * + * AEAD stands for "Authenticated Encryption with Associated Data". + * It is a standard API pattern for securely encrypting and + * authenticating packets of data. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Encrypts and authenticates a packet with an AEAD scheme. + * + * \param c Buffer to receive the output. + * \param clen On exit, set to the length of the output which includes + * the ciphertext and the authentication tag. + * \param m Buffer that contains the plaintext message to encrypt. + * \param mlen Length of the plaintext message in bytes. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param nsec Secret nonce - normally not used by AEAD schemes. + * \param npub Points to the public nonce for the packet. + * \param k Points to the key to use to encrypt the packet. + * + * \return 0 on success, or a negative value if there was an error in + * the parameters. + */ +typedef int (*aead_cipher_encrypt_t) + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Decrypts and authenticates a packet with an AEAD scheme. + * + * \param m Buffer to receive the plaintext message on output. + * \param mlen Receives the length of the plaintext message on output. + * \param nsec Secret nonce - normally not used by AEAD schemes. + * \param c Buffer that contains the ciphertext and authentication + * tag to decrypt. + * \param clen Length of the input data in bytes, which includes the + * ciphertext and the authentication tag. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param npub Points to the public nonce for the packet. + * \param k Points to the key to use to decrypt the packet. + * + * \return 0 on success, -1 if the authentication tag was incorrect, + * or some other negative number if there was an error in the parameters. + */ +typedef int (*aead_cipher_decrypt_t) + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Hashes a block of input data. + * + * \param out Buffer to receive the hash output. + * \param in Points to the input data to be hashed. + * \param inlen Length of the input data in bytes. + * + * \return Returns zero on success or -1 if there was an error in the + * parameters. + */ +typedef int (*aead_hash_t) + (unsigned char *out, const unsigned char *in, unsigned long long inlen); + +/** + * \brief Initializes the state for a hashing operation. + * + * \param state Hash state to be initialized. + */ +typedef void (*aead_hash_init_t)(void *state); + +/** + * \brief Updates a hash state with more input data. + * + * \param state Hash state to be updated. + * \param in Points to the input data to be incorporated into the state. + * \param inlen Length of the input data to be incorporated into the state. + */ +typedef void (*aead_hash_update_t) + (void *state, const unsigned char *in, unsigned long long inlen); + +/** + * \brief Returns the final hash value from a hashing operation. + * + * \param Hash state to be finalized. + * \param out Points to the output buffer to receive the hash value. + */ +typedef void (*aead_hash_finalize_t)(void *state, unsigned char *out); + +/** + * \brief Aborbs more input data into an XOF state. + * + * \param state XOF state to be updated. + * \param in Points to the input data to be absorbed into the state. + * \param inlen Length of the input data to be absorbed into the state. + * + * \sa ascon_xof_init(), ascon_xof_squeeze() + */ +typedef void (*aead_xof_absorb_t) + (void *state, const unsigned char *in, unsigned long long inlen); + +/** + * \brief Squeezes output data from an XOF state. + * + * \param state XOF state to squeeze the output data from. + * \param out Points to the output buffer to receive the squeezed data. + * \param outlen Number of bytes of data to squeeze out of the state. + */ +typedef void (*aead_xof_squeeze_t) + (void *state, unsigned char *out, unsigned long long outlen); + +/** + * \brief No special AEAD features. + */ +#define AEAD_FLAG_NONE 0x0000 + +/** + * \brief The natural byte order of the AEAD cipher is little-endian. + * + * If this flag is not present, then the natural byte order of the + * AEAD cipher should be assumed to be big-endian. + * + * The natural byte order may be useful when formatting packet sequence + * numbers as nonces. The application needs to know whether the sequence + * number should be packed into the leading or trailing bytes of the nonce. + */ +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief Meta-information about an AEAD cipher. + */ +typedef struct +{ + const char *name; /**< Name of the cipher */ + unsigned key_len; /**< Length of the key in bytes */ + unsigned nonce_len; /**< Length of the nonce in bytes */ + unsigned tag_len; /**< Length of the tag in bytes */ + unsigned flags; /**< Flags for extra features */ + aead_cipher_encrypt_t encrypt; /**< AEAD encryption function */ + aead_cipher_decrypt_t decrypt; /**< AEAD decryption function */ + unsigned char *expected; /**< AEAD encryption benchmark expected result */ +} aead_cipher_t; + +/** + * \brief Meta-information about a hash algorithm that is related to an AEAD. + * + * Regular hash algorithms should provide the "hash", "init", "update", + * and "finalize" functions. Extensible Output Functions (XOF's) should + * proivde the "hash", "init", "absorb", and "squeeze" functions. + */ +typedef struct +{ + const char *name; /**< Name of the hash algorithm */ + size_t state_size; /**< Size of the incremental state structure */ + unsigned hash_len; /**< Length of the hash in bytes */ + unsigned flags; /**< Flags for extra features */ + aead_hash_t hash; /**< All in one hashing function */ + aead_hash_init_t init; /**< Incremental hash/XOF init function */ + aead_hash_update_t update; /**< Incremental hash update function */ + aead_hash_finalize_t finalize; /**< Incremental hash finalize function */ + aead_xof_absorb_t absorb; /**< Incremental XOF absorb function */ + aead_xof_squeeze_t squeeze; /**< Incremental XOF squeeze function */ + +} aead_hash_algorithm_t; + +/** + * \brief Check an authentication tag in constant time. + * + * \param plaintext Points to the plaintext data. + * \param plaintext_len Length of the plaintext in bytes. + * \param tag1 First tag to compare. + * \param tag2 Second tag to compare. + * \param tag_len Length of the tags in bytes. + * + * \return Returns -1 if the tag check failed or 0 if the check succeeded. + * + * If the tag check fails, then the \a plaintext will also be zeroed to + * prevent it from being used accidentally by the application when the + * ciphertext was invalid. + */ +int aead_check_tag + (unsigned char *plaintext, unsigned long long plaintext_len, + const unsigned char *tag1, const unsigned char *tag2, + unsigned tag_len); + +/** + * \brief Check an authentication tag in constant time with a previous check. + * + * \param plaintext Points to the plaintext data. + * \param plaintext_len Length of the plaintext in bytes. + * \param tag1 First tag to compare. + * \param tag2 Second tag to compare. + * \param tag_len Length of the tags in bytes. + * \param precheck Set to -1 if previous check succeeded or 0 if it failed. + * + * \return Returns -1 if the tag check failed or 0 if the check succeeded. + * + * If the tag check fails, then the \a plaintext will also be zeroed to + * prevent it from being used accidentally by the application when the + * ciphertext was invalid. + * + * This version can be used to incorporate other information about the + * correctness of the plaintext into the final result. + */ +int aead_check_tag_precheck + (unsigned char *plaintext, unsigned long long plaintext_len, + const unsigned char *tag1, const unsigned char *tag2, + unsigned tag_len, int precheck); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/api.h b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon.c b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon.c new file mode 100644 index 0000000..b65890f --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon.c @@ -0,0 +1,528 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "drygascon.h" +#include "internal-drysponge.h" +#include + +uint8_t drygascon128k32_expected[DRYGASCON128_TAG_SIZE]={0x66,0x5A,0xDE,0x6C,0x0F,0xBD,0x48,0x8C,0x5E,0xA4,0x77,0x5D,0xD6,0x24,0xDA,0xD7}; + +uint8_t drygascon128k56_expected[DRYGASCON128_TAG_SIZE]={0x7B,0x8B,0x9D,0x58,0xA7,0xF7,0x5F,0x1E,0x56,0x99,0x46,0xD6,0x24,0xC4,0xF7,0x68}; + +uint8_t drygascon128k16_expected[DRYGASCON128_TAG_SIZE]={0x14,0xA5,0x21,0x17,0xFF,0x52,0x4F,0x7C,0xCB,0xB3,0xEB,0xE4,0x05,0xEF,0x18,0xA4}; + +const aead_cipher_t const drygascon128k32_cipher = { + "DryGASCON128k32", + DRYGASCON128_FASTKEY_SIZE, + DRYGASCON128_NONCE_SIZE, + DRYGASCON128_TAG_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + drygascon128k32_aead_encrypt, + drygascon128k32_aead_decrypt, + drygascon128k32_expected +}; + +const aead_cipher_t const drygascon128_cipher = { + "DryGASCON128k32", + DRYGASCON128_FASTKEY_SIZE, + DRYGASCON128_NONCE_SIZE, + DRYGASCON128_TAG_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + drygascon128k32_aead_encrypt, + drygascon128k32_aead_decrypt, + drygascon128k32_expected +}; + +const aead_cipher_t const drygascon128k56_cipher = { + "DryGASCON128k56", + DRYGASCON128_SAFEKEY_SIZE, + DRYGASCON128_NONCE_SIZE, + DRYGASCON128_TAG_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + drygascon128k56_aead_encrypt, + drygascon128k56_aead_decrypt, + drygascon128k56_expected +}; + +const aead_cipher_t const drygascon128k16_cipher = { + "DryGASCON128k16", + DRYGASCON128_MINKEY_SIZE, + DRYGASCON128_NONCE_SIZE, + DRYGASCON128_TAG_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + drygascon128k16_aead_encrypt, + drygascon128k16_aead_decrypt, + drygascon128k16_expected +}; + +aead_cipher_t const drygascon256_cipher = { + "DryGASCON256", + DRYGASCON256_KEY_SIZE, + DRYGASCON256_NONCE_SIZE, + DRYGASCON256_TAG_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + drygascon256_aead_encrypt, + drygascon256_aead_decrypt +}; + +aead_hash_algorithm_t const drygascon128_hash_algorithm = { + "DryGASCON128-HASH", + sizeof(int), + DRYGASCON128_HASH_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + drygascon128_hash, + (aead_hash_init_t)0, + (aead_hash_update_t)0, + (aead_hash_finalize_t)0, + (aead_xof_absorb_t)0, + (aead_xof_squeeze_t)0 +}; + +aead_hash_algorithm_t const drygascon256_hash_algorithm = { + "DryGASCON256-HASH", + sizeof(int), + DRYGASCON256_HASH_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + drygascon256_hash, + (aead_hash_init_t)0, + (aead_hash_update_t)0, + (aead_hash_finalize_t)0, + (aead_xof_absorb_t)0, + (aead_xof_squeeze_t)0 +}; + +/** + * \brief Processes associated data for DryGASCON128. + * + * \param state DrySPONGE128 sponge state. + * \param ad Points to the associated data. + * \param adlen Length of the associated data, must not be zero. + * \param finalize Non-zero to finalize packet processing because + * the message is zero-length. + */ +static void drygascon128_process_ad + (drysponge128_state_t *state, const unsigned char *ad, + unsigned long long adlen, int finalize) +{ + /* Process all blocks except the last one */ + while (adlen > DRYSPONGE128_RATE) { + drygascon128_f_wrap(state, ad, DRYSPONGE128_RATE); + //drysponge128_g_core(state); + ad += DRYSPONGE128_RATE; + adlen -= DRYSPONGE128_RATE; + } + + /* Process the last block with domain separation and padding */ + state->domain = DRYDOMAIN128_ASSOC_DATA; + if (finalize) + state->domain |= DRYDOMAIN128_FINAL; + if (adlen < DRYSPONGE128_RATE) + state->domain |= DRYDOMAIN128_PADDED; + drygascon128_f_wrap(state, ad, (unsigned)adlen); + //drysponge128_g(state); +} + +/** + * \brief Processes associated data for DryGASCON256. + * + * \param state DrySPONGE256 sponge state. + * \param ad Points to the associated data. + * \param adlen Length of the associated data, must not be zero. + * \param finalize Non-zero to finalize packet processing because + * the message is zero-length. + */ +static void drygascon256_process_ad + (drysponge256_state_t *state, const unsigned char *ad, + unsigned long long adlen, int finalize) +{ + /* Process all blocks except the last one */ + while (adlen > DRYSPONGE256_RATE) { + drysponge256_f_absorb(state, ad, DRYSPONGE256_RATE); + drysponge256_g_core(state); + ad += DRYSPONGE256_RATE; + adlen -= DRYSPONGE256_RATE; + } + + /* Process the last block with domain separation and padding */ + state->domain = DRYDOMAIN256_ASSOC_DATA; + if (finalize) + state->domain |= DRYDOMAIN256_FINAL; + if (adlen < DRYSPONGE256_RATE) + state->domain |= DRYDOMAIN256_PADDED; + drysponge256_f_absorb(state, ad, (unsigned)adlen); + drysponge256_g(state); +} + +int drygascon128_aead_encrypt_core + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + unsigned int keysize, + const unsigned char *npub, + const unsigned char *k) +{ + drysponge128_state_t state; + unsigned temp; + + /* Check we are safe */ + if(!drysponge128_safe_alignement(&state)){ + return -1; + } + + /* Set the length of the returned ciphertext */ + *clen = mlen + DRYGASCON128_TAG_SIZE; + + /* Initialize the sponge state with the key and nonce */ + drysponge128_setup(&state, k, keysize, npub, adlen == 0 && mlen == 0); + + /* Process the associated data */ + if (adlen > 0) + drygascon128_process_ad(&state, ad, adlen, mlen == 0); + + /* Encrypt the plaintext to produce the ciphertext */ + if (mlen > 0) { + /* Processs all blocks except the last one */ + while (mlen > DRYSPONGE128_RATE) { + lw_xor_block_2_src(c, m, state.r.B, DRYSPONGE128_RATE); + drygascon128_f_wrap(&state, m, DRYSPONGE128_RATE); + c += DRYSPONGE128_RATE; + m += DRYSPONGE128_RATE; + mlen -= DRYSPONGE128_RATE; + } + + /* Process the last block with domain separation and padding */ + state.domain = DRYDOMAIN128_MESSAGE | DRYDOMAIN128_FINAL; + if (mlen < DRYSPONGE128_RATE) + state.domain |= DRYDOMAIN128_PADDED; + temp = (unsigned)mlen; + lw_xor_block_2_src(c, m, state.r.B, temp); + drygascon128_f_wrap(&state, m, temp); + c += temp; + } + + /* Generate the authentication tag */ + memcpy(c, state.r.B, DRYGASCON128_TAG_SIZE); + return 0; +} + +int drygascon128_aead_decrypt_core + (unsigned char *m, unsigned long long *mlen, + unsigned int keysize, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + drysponge128_state_t state; + unsigned char *mtemp = m; + unsigned temp; + + /* Check we are safe */ + if(!drysponge128_safe_alignement(&state)){ + return -1; + } + + /* Validate the ciphertext length and set the return "mlen" value */ + if (clen < DRYGASCON128_TAG_SIZE) + return -1; + *mlen = clen - DRYGASCON128_TAG_SIZE; + + /* Initialize the sponge state with the key and nonce */ + clen -= DRYGASCON128_TAG_SIZE; + drysponge128_setup(&state, k, keysize, npub, adlen == 0 && clen == 0); + + /* Process the associated data */ + if (adlen > 0) + drygascon128_process_ad(&state, ad, adlen, clen == 0); + + /* Decrypt the ciphertext to produce the plaintext */ + if (clen > 0) { + /* Processs all blocks except the last one */ + while (clen > DRYSPONGE128_RATE) { + lw_xor_block_2_src(m, c, state.r.B, DRYSPONGE128_RATE); + drygascon128_f_wrap(&state, m, DRYSPONGE128_RATE); + //drysponge128_g(&state); + c += DRYSPONGE128_RATE; + m += DRYSPONGE128_RATE; + clen -= DRYSPONGE128_RATE; + } + + /* Process the last block with domain separation and padding */ + state.domain = DRYDOMAIN128_MESSAGE | DRYDOMAIN128_FINAL; + if (clen < DRYSPONGE128_RATE) + state.domain |= DRYDOMAIN128_PADDED; + temp = (unsigned)clen; + lw_xor_block_2_src(m, c, state.r.B, temp); + drygascon128_f_wrap(&state, m, temp); + //drysponge128_g(&state); + c += temp; + } + + /* Check the authentication tag */ + return aead_check_tag(mtemp, *mlen, state.r.B, c, DRYGASCON128_TAG_SIZE); +} + +int drygascon128k16_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k){ + return drygascon128_aead_encrypt_core(c,clen,m,mlen,ad,adlen,16,npub,k); +} + +int drygascon128k32_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k){ + return drygascon128_aead_encrypt_core(c,clen,m,mlen,ad,adlen,32,npub,k); +} + +int drygascon128k56_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k){ + return drygascon128_aead_encrypt_core(c,clen,m,mlen,ad,adlen,56,npub,k); +} + + +int drygascon128k16_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k){ + return drygascon128_aead_decrypt_core(m,mlen,16,c,clen,ad,adlen,npub,k); +} + +int drygascon128k32_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k){ + return drygascon128_aead_decrypt_core(m,mlen,32,c,clen,ad,adlen,npub,k); +} + +int drygascon128k56_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k){ + return drygascon128_aead_decrypt_core(m,mlen,56,c,clen,ad,adlen,npub,k); +} + +int drygascon256_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + drysponge256_state_t state; + unsigned temp; + (void)nsec; + + /* Set the length of the returned ciphertext */ + *clen = mlen + DRYGASCON256_TAG_SIZE; + + /* Initialize the sponge state with the key and nonce */ + drysponge256_setup(&state, k, npub, adlen == 0 && mlen == 0); + + /* Process the associated data */ + if (adlen > 0) + drygascon256_process_ad(&state, ad, adlen, mlen == 0); + + /* Encrypt the plaintext to produce the ciphertext */ + if (mlen > 0) { + /* Processs all blocks except the last one */ + while (mlen > DRYSPONGE256_RATE) { + drysponge256_f_absorb(&state, m, DRYSPONGE256_RATE); + lw_xor_block_2_src(c, m, state.r.B, DRYSPONGE256_RATE); + drysponge256_g(&state); + c += DRYSPONGE256_RATE; + m += DRYSPONGE256_RATE; + mlen -= DRYSPONGE256_RATE; + } + + /* Process the last block with domain separation and padding */ + state.domain = DRYDOMAIN256_MESSAGE | DRYDOMAIN256_FINAL; + if (mlen < DRYSPONGE256_RATE) + state.domain |= DRYDOMAIN256_PADDED; + temp = (unsigned)mlen; + drysponge256_f_absorb(&state, m, temp); + lw_xor_block_2_src(c, m, state.r.B, temp); + drysponge256_g(&state); + c += temp; + } + + /* Generate the authentication tag */ + memcpy(c, state.r.B, 16); + drysponge256_g(&state); + memcpy(c + 16, state.r.B, 16); + return 0; +} + +int drygascon256_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + drysponge256_state_t state; + unsigned char *mtemp = m; + unsigned temp; + int result; + (void)nsec; + + /* Validate the ciphertext length and set the return "mlen" value */ + if (clen < DRYGASCON256_TAG_SIZE) + return -1; + *mlen = clen - DRYGASCON256_TAG_SIZE; + + /* Initialize the sponge state with the key and nonce */ + clen -= DRYGASCON256_TAG_SIZE; + drysponge256_setup(&state, k, npub, adlen == 0 && clen == 0); + + /* Process the associated data */ + if (adlen > 0) + drygascon256_process_ad(&state, ad, adlen, clen == 0); + + /* Decrypt the ciphertext to produce the plaintext */ + if (clen > 0) { + /* Processs all blocks except the last one */ + while (clen > DRYSPONGE256_RATE) { + lw_xor_block_2_src(m, c, state.r.B, DRYSPONGE256_RATE); + drysponge256_f_absorb(&state, m, DRYSPONGE256_RATE); + drysponge256_g(&state); + c += DRYSPONGE256_RATE; + m += DRYSPONGE256_RATE; + clen -= DRYSPONGE256_RATE; + } + + /* Process the last block with domain separation and padding */ + state.domain = DRYDOMAIN256_MESSAGE | DRYDOMAIN256_FINAL; + if (clen < DRYSPONGE256_RATE) + state.domain |= DRYDOMAIN256_PADDED; + temp = (unsigned)clen; + lw_xor_block_2_src(m, c, state.r.B, temp); + drysponge256_f_absorb(&state, m, temp); + drysponge256_g(&state); + c += temp; + } + + /* Check the authentication tag which is split into two pieces */ + result = aead_check_tag(0, 0, state.r.B, c, 16); + drysponge256_g(&state); + return aead_check_tag_precheck + (mtemp, *mlen, state.r.B, c + 16, 16, ~result); +} + +/** + * \brief Precomputed initialization vector for DryGASCON128-HASH. + * + * This is the CST_H value from the DryGASCON specification after it + * has been processed by the key setup function for DrySPONGE128. + */ +static unsigned char const drygascon128_hash_init[] = { + /* c */ + 0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3, + 0x13, 0x19, 0x8a, 0x2e, 0x03, 0x70, 0x73, 0x44, + 0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3, + 0x13, 0x19, 0x8a, 0x2e, 0x03, 0x70, 0x73, 0x44, + 0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3, + /* x */ + 0xa4, 0x09, 0x38, 0x22, 0x29, 0x9f, 0x31, 0xd0, + 0x08, 0x2e, 0xfa, 0x98, 0xec, 0x4e, 0x6c, 0x89 +}; + +int drygascon128_hash + (unsigned char *out, const unsigned char *in, unsigned long long inlen) +{ + drysponge128_state_t state; + memcpy(state.c.B, drygascon128_hash_init, sizeof(state.c.B)); + memcpy(state.x.B, drygascon128_hash_init + sizeof(state.c.B), + sizeof(state.x.B)); + state.domain = 0; + state.rounds = DRYSPONGE128_ROUNDS; + drygascon128_process_ad(&state, in, inlen, 1); + memcpy(out, state.r.B, 16); + drysponge128_g(&state); + memcpy(out + 16, state.r.B, 16); + return 0; +} + +/** + * \brief Precomputed initialization vector for DryGASCON256-HASH. + * + * This is the CST_H value from the DryGASCON specification after it + * has been processed by the key setup function for DrySPONGE256. + */ +static unsigned char const drygascon256_hash_init[] = { + /* c */ + 0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3, + 0x13, 0x19, 0x8a, 0x2e, 0x03, 0x70, 0x73, 0x44, + 0xa4, 0x09, 0x38, 0x22, 0x29, 0x9f, 0x31, 0xd0, + 0x08, 0x2e, 0xfa, 0x98, 0xec, 0x4e, 0x6c, 0x89, + 0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3, + 0x13, 0x19, 0x8a, 0x2e, 0x03, 0x70, 0x73, 0x44, + 0xa4, 0x09, 0x38, 0x22, 0x29, 0x9f, 0x31, 0xd0, + 0x08, 0x2e, 0xfa, 0x98, 0xec, 0x4e, 0x6c, 0x89, + 0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3, + /* x */ + 0x45, 0x28, 0x21, 0xe6, 0x38, 0xd0, 0x13, 0x77, + 0xbe, 0x54, 0x66, 0xcf, 0x34, 0xe9, 0x0c, 0x6c +}; + +int drygascon256_hash + (unsigned char *out, const unsigned char *in, unsigned long long inlen) +{ + drysponge256_state_t state; + memcpy(state.c.B, drygascon256_hash_init, sizeof(state.c.B)); + memcpy(state.x.B, drygascon256_hash_init + sizeof(state.c.B), + sizeof(state.x.B)); + state.domain = 0; + state.rounds = DRYSPONGE256_ROUNDS; + drygascon256_process_ad(&state, in, inlen, 1); + memcpy(out, state.r.B, 16); + drysponge256_g(&state); + memcpy(out + 16, state.r.B, 16); + drysponge256_g(&state); + memcpy(out + 32, state.r.B, 16); + drysponge256_g(&state); + memcpy(out + 48, state.r.B, 16); + return 0; +} diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon.h b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon.h new file mode 100644 index 0000000..dab98ec --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon.h @@ -0,0 +1,437 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LWCRYPTO_DRYGASCON_H +#define LWCRYPTO_DRYGASCON_H + +#include "aead-common.h" + +/** + * \file drygascon.h + * \brief DryGASCON authenticated encryption algorithm. + * + * DryGASCON is a family of authenticated encryption algorithms based + * around a generalised version of the ASCON permutation. DryGASCON + * is designed to provide some protection against power analysis. + * + * There are four algorithms in the DryGASCON family: + * + * \li DryGASCON128 is an authenticated encryption algorithm with a + * 128-bit key, a 128-bit nonce, and a 128-bit authentication tag. + * \li DryGASCON256 is an authenticated encryption algorithm with a + * 256-bit key, a 128-bit nonce, and a 128-256 authentication tag. + * \li DryGASCON128-HASH is a hash algorithm with a 256-bit output. + * \li DryGASCON256-HASH is a hash algorithm with a 512-bit output. + * + * DryGASCON128 and DryGASCON128-HASH are the primary members of the family. + * + * References: https://github.com/sebastien-riou/DryGASCON + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Minimum Size of the key for DryGASCON128. + */ +#define DRYGASCON128_MINKEY_SIZE 16 + +/** + * \brief Fast Size of the key for DryGASCON128. + */ +#define DRYGASCON128_FASTKEY_SIZE 32 + +/** + * \brief Safe (and fast) Size of the key for DryGASCON128. + * Safe here means the size of the key helps prevent SPA during key loading + */ +#define DRYGASCON128_SAFEKEY_SIZE 56 + +/** + * \brief Size of the key for DryGASCON128 (default to "fast" size). + */ +#define DRYGASCON128_KEY_SIZE DRYGASCON128_FASTKEY_SIZE + +/** + * \brief Size of the authentication tag for DryGASCON128. + */ +#define DRYGASCON128_TAG_SIZE 16 + +/** + * \brief Size of the nonce for DryGASCON128. + */ +#define DRYGASCON128_NONCE_SIZE 16 + +/** + * \brief Size of the hash output for DryGASCON128-HASH. + */ +#define DRYGASCON128_HASH_SIZE 32 + +/** + * \brief Size of the key for DryGASCON256. + */ +#define DRYGASCON256_KEY_SIZE 32 + +/** + * \brief Size of the authentication tag for DryGASCON256. + */ +#define DRYGASCON256_TAG_SIZE 32 + +/** + * \brief Size of the nonce for DryGASCON256. + */ +#define DRYGASCON256_NONCE_SIZE 16 + +/** + * \brief Size of the hash output for DryGASCON256-HASH. + */ +#define DRYGASCON256_HASH_SIZE 64 + +/** + * \brief Meta-information block for the DryGASCON128 cipher with 32 bytes key. + */ +extern aead_cipher_t const drygascon128k32_cipher; + +/** + * \brief Meta-information block for the DryGASCON128 cipher with 56 bytes key. + */ +extern aead_cipher_t const drygascon128k56_cipher; + +/** + * \brief Meta-information block for the DryGASCON128 cipher with 16 bytes key. + */ +extern aead_cipher_t const drygascon128k16_cipher; + +/** + * \brief Meta-information block for the DryGASCON128 cipher (default to 32 bytes key). + */ +extern aead_cipher_t const drygascon128_cipher; + +/** + * \brief Meta-information block for the DryGASCON256 cipher. + */ +extern aead_cipher_t const drygascon256_cipher; + +/** + * \brief Meta-information block for DryGASCON128-HASH. + */ +extern aead_hash_algorithm_t const drygascon128_hash_algorithm; + +/** + * \brief Meta-information block for DryGASCON256-HASH. + */ +extern aead_hash_algorithm_t const drygascon256_hash_algorithm; + +/** + * \brief Encrypts and authenticates a packet with DryGASCON128 with 32 bytes key. + * + * Use this key size if SPA attacks are not a concern in your use case. + * + * \param c Buffer to receive the output. + * \param clen On exit, set to the length of the output which includes + * the ciphertext and the 16 byte authentication tag. + * \param m Buffer that contains the plaintext message to encrypt. + * \param mlen Length of the plaintext message in bytes. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param nsec Secret nonce - not used by this algorithm. + * \param npub Points to the public nonce for the packet which must + * be 16 bytes in length. + * \param k Points to the 32 bytes of the key to use to encrypt the packet. + * + * Note that the function blocks if the 16 last bytes of the key are "invalid". + * Here "invalid" means that 32 bit words shall be different from each other. + * + * \return 0 on success, or a negative value if there was an error in + * the parameters. + * + * \sa drygascon128k32_aead_decrypt() + */ +int drygascon128k32_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Decrypts and authenticates a packet with DryGASCON128 with 32 bytes key. + * + * Use this key size if SPA attacks are not a concern in your use case. + * + * \param m Buffer to receive the plaintext message on output. + * \param mlen Receives the length of the plaintext message on output. + * \param nsec Secret nonce - not used by this algorithm. + * \param c Buffer that contains the ciphertext and authentication + * tag to decrypt. + * \param clen Length of the input data in bytes, which includes the + * ciphertext and the 16 byte authentication tag. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param npub Points to the public nonce for the packet which must + * be 16 bytes in length. + * \param k Points to the 32 bytes of the key to use to decrypt the packet. + * + * Note that the function blocks if the 16 last bytes of the key are "invalid". + * Here "invalid" means that 32 bit words shall be different from each other. + * + * \return 0 on success, -1 if the authentication tag was incorrect, + * or some other negative number if there was an error in the parameters. + * + * \sa drygascon128k32_aead_encrypt() + */ +int drygascon128k32_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Encrypts and authenticates a packet with DryGASCON128 with 56 bytes key. + * + * Use this key size if you want to prevent SPA attacks + * + * \param c Buffer to receive the output. + * \param clen On exit, set to the length of the output which includes + * the ciphertext and the 16 byte authentication tag. + * \param m Buffer that contains the plaintext message to encrypt. + * \param mlen Length of the plaintext message in bytes. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param nsec Secret nonce - not used by this algorithm. + * \param npub Points to the public nonce for the packet which must + * be 16 bytes in length. + * \param k Points to the 56 bytes of the key to use to encrypt the packet. + * + * Note that the function blocks if the 16 last bytes of the key are "invalid". + * Here "invalid" means that 32 bit words shall be different from each other. + * + * \return 0 on success, or a negative value if there was an error in + * the parameters. + * + * \sa drygascon128k56_aead_decrypt() + */ +int drygascon128k56_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Decrypts and authenticates a packet with DryGASCON128 with 56 bytes key. + * + * Use this key size if you want to prevent SPA attacks + * + * \param m Buffer to receive the plaintext message on output. + * \param mlen Receives the length of the plaintext message on output. + * \param nsec Secret nonce - not used by this algorithm. + * \param c Buffer that contains the ciphertext and authentication + * tag to decrypt. + * \param clen Length of the input data in bytes, which includes the + * ciphertext and the 16 byte authentication tag. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param npub Points to the public nonce for the packet which must + * be 16 bytes in length. + * \param k Points to the 56 bytes of the key to use to decrypt the packet. + * + * Note that the function blocks if the 16 last bytes of the key are "invalid". + * Here "invalid" means that 32 bit words shall be different from each other. + * + * \return 0 on success, -1 if the authentication tag was incorrect, + * or some other negative number if there was an error in the parameters. + * + * \sa drygascon128k56_aead_encrypt() + */ +int drygascon128k56_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Encrypts and authenticates a packet with DryGASCON128 with 16 bytes key. + * + * Use this key size only if you really cannot use the 32 bytes key. + * + * \param c Buffer to receive the output. + * \param clen On exit, set to the length of the output which includes + * the ciphertext and the 16 byte authentication tag. + * \param m Buffer that contains the plaintext message to encrypt. + * \param mlen Length of the plaintext message in bytes. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param nsec Secret nonce - not used by this algorithm. + * \param npub Points to the public nonce for the packet which must + * be 16 bytes in length. + * \param k Points to the 16 bytes of the key to use to encrypt the packet. + * + * \return 0 on success, or a negative value if there was an error in + * the parameters. + * + * \sa drygascon128k16_aead_decrypt() + */ +int drygascon128k16_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Decrypts and authenticates a packet with DryGASCON128 with 16 bytes key. + * + * Use this key size only if you really cannot use the 32 bytes key. + * + * \param m Buffer to receive the plaintext message on output. + * \param mlen Receives the length of the plaintext message on output. + * \param nsec Secret nonce - not used by this algorithm. + * \param c Buffer that contains the ciphertext and authentication + * tag to decrypt. + * \param clen Length of the input data in bytes, which includes the + * ciphertext and the 16 byte authentication tag. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param npub Points to the public nonce for the packet which must + * be 16 bytes in length. + * \param k Points to the 16 bytes of the key to use to decrypt the packet. + * + * \return 0 on success, -1 if the authentication tag was incorrect, + * or some other negative number if there was an error in the parameters. + * + * \sa drygascon128k16_aead_encrypt() + */ +int drygascon128k16_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Encrypts and authenticates a packet with DryGASCON256. + * + * \param c Buffer to receive the output. + * \param clen On exit, set to the length of the output which includes + * the ciphertext and the 16 byte authentication tag. + * \param m Buffer that contains the plaintext message to encrypt. + * \param mlen Length of the plaintext message in bytes. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param nsec Secret nonce - not used by this algorithm. + * \param npub Points to the public nonce for the packet which must + * be 16 bytes in length. + * \param k Points to the 16 bytes of the key to use to encrypt the packet. + * + * \return 0 on success, or a negative value if there was an error in + * the parameters. + * + * \sa drygascon256_aead_decrypt() + */ +int drygascon256_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Decrypts and authenticates a packet with DryGASCON256. + * + * \param m Buffer to receive the plaintext message on output. + * \param mlen Receives the length of the plaintext message on output. + * \param nsec Secret nonce - not used by this algorithm. + * \param c Buffer that contains the ciphertext and authentication + * tag to decrypt. + * \param clen Length of the input data in bytes, which includes the + * ciphertext and the 16 byte authentication tag. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param npub Points to the public nonce for the packet which must + * be 16 bytes in length. + * \param k Points to the 16 bytes of the key to use to decrypt the packet. + * + * \return 0 on success, -1 if the authentication tag was incorrect, + * or some other negative number if there was an error in the parameters. + * + * \sa drygascon256_aead_encrypt() + */ +int drygascon256_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Hashes a block of input data with DRYGASCON128. + * + * \param out Buffer to receive the hash output which must be at least + * DRYGASCON128_HASH_SIZE bytes in length. + * \param in Points to the input data to be hashed. + * \param inlen Length of the input data in bytes. + * + * \return Returns zero on success or -1 if there was an error in the + * parameters. + */ +int drygascon128_hash + (unsigned char *out, const unsigned char *in, unsigned long long inlen); + +/** + * \brief Hashes a block of input data with DRYGASCON256. + * + * \param out Buffer to receive the hash output which must be at least + * DRYGASCON256_HASH_SIZE bytes in length. + * \param in Points to the input data to be hashed. + * \param inlen Length of the input data in bytes. + * + * \return Returns zero on success or -1 if there was an error in the + * parameters. + */ +int drygascon256_hash + (unsigned char *out, const unsigned char *in, unsigned long long inlen); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v6m.S b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v6m.S new file mode 100644 index 0000000..930df1d --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v6m.S @@ -0,0 +1,733 @@ +/** +DryGascon128 'v6m implementation' +Sebastien Riou, May 27th 2020 + +Implementation optimized for ARM-Cortex-M0 (Size and Speed) +*/ + +#if defined(__DRYGASCON_ARM_SELECTOR_H__) +.cpu cortex-m0 +.syntax unified +.code 16 +.thumb_func + +.align 1 +.global drygascon128_g_v6m +.global drygascon128_f_v6m + + .equ C0, 0 + .equ C1, C0+8 + .equ C2, C0+16 + .equ C3, C0+24 + .equ C4, C0+32 + .equ R0, 48 + .equ R1, R0+8 + .equ X0, 64 + .equ X1, X0+8 + + .equ X0L, X0 + .equ X1L, X1 + .equ C0L, C0 + .equ C1L, C1 + .equ C2L, C2 + .equ C3L, C3 + .equ C4L, C4 + .equ R0L, R0 + .equ R1L, R1 + + .equ X0H, X0+4 + .equ X1H, X1+4 + .equ C0H, C0+4 + .equ C1H, C1+4 + .equ C2H, C2+4 + .equ C3H, C3+4 + .equ C4H, C4+4 + .equ R0H, R0+4 + .equ R1H, R1+4 + + .equ R32_0, R0L + .equ R32_1, R0H + .equ R32_2, R1L + .equ R32_3, R1H + + +.type drygascon128_g_v6m, %function +drygascon128_g_v6m: + //r0: state: c,r,x + //r1: rounds + push {r4, r5, r6, r7, lr} + //stack vars: + // 8 round + // 4 rounds + // 0 state address + + //r=0 + movs r5,#0 + str r5,[r0,#R32_0] + str r5,[r0,#R32_1] + str r5,[r0,#R32_2] + str r5,[r0,#R32_3] + + //round=r5=rounds-1; + subs r6,r1,#1 + //base = round_cst+12-rounds + adr r5, round_cst + adds r5,r5,#12 + subs r5,r5,r1 + + push {r0,r5,r6} + + ldr r4,[r0,#C4L] + ldr r3,[r0,#C3L] + ldr r2,[r0,#C2L] + ldr r1,[r0,#C1L] + ldr r0,[r0,#C0L] + + //loop entry + //assume r1>0 at entry +drygascon128_g_v6m_main_loop: + //r0~r4: lower half of each words of the state + //r5: base for round constants + //r6: round, counting from rounds-1 to 0 + + //r6 = ((0xf - r6) << 4) | r6; + ldrb r6,[r5,r6] + // addition of round constant + //r2 ^= r6; + eors r2,r2,r6 + + // substitution layer, lower half + eors r0,r0,r4 + eors r4,r4,r3 + eors r2,r2,r1 + + mvns r5,r0 + mvns r6,r3 + mvns r7,r4 + ands r5,r5,r1 + ands r6,r6,r4 + eors r4,r4,r5 + + ands r7,r7,r0 + mvns r5,r2 + ands r5,r5,r3 + eors r3,r3,r7 + + mvns r7,r1 + ands r7,r7,r2 + eors r2,r2,r6 + + eors r3,r3,r2 + mvns r2,r2 + + eors r0,r0,r7 + eors r1,r1,r5 + eors r1,r1,r0 + eors r0,r0,r4 + + ldr r7,[sp,#0] + str r4,[r7,#C4L] + str r3,[r7,#C3L] + str r2,[r7,#C2L] + str r1,[r7,#C1L] + str r0,[r7,#C0L] + + ldr r4,[r7,#C4H] + ldr r3,[r7,#C3H] + ldr r2,[r7,#C2H] + ldr r1,[r7,#C1H] + ldr r0,[r7,#C0H] + + // substitution layer, upper half + eors r0,r0,r4 + eors r4,r4,r3 + eors r2,r2,r1 + + mvns r5,r0 + mvns r6,r3 + mvns r7,r4 + ands r5,r5,r1 + ands r6,r6,r4 + eors r4,r4,r5 + + ands r7,r7,r0 + mvns r5,r2 + ands r5,r5,r3 + eors r3,r3,r7 + + mvns r7,r1 + ands r7,r7,r2 + eors r2,r2,r6 + + eors r3,r3,r2 + mvns r2,r2 + + eors r0,r0,r7 + eors r1,r1,r5 + eors r1,r1,r0 + eors r0,r0,r4 + + // linear diffusion layer + ldr r7,[sp,#0] + + //c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7); + //c4 high part + movs r6,r4 + movs r5,#(20) + rors r4,r4,r5 + eors r6,r6,r4 + ldr r5,[r7,#C4L] + movs r7,#(4) + rors r5,r5,r7 + eors r6,r6,r5 + ldr r7,[sp,#0] + str r6,[r7,#C4H] + //c4 low part + movs r7,#(32-4) + rors r5,r5,r7 + movs r6,r5 + movs r7,#((32-20+3)%32) + rors r4,r4,r7 + eors r4,r4,r6 + movs r7,#(20) + rors r5,r5,r7 + eors r4,r4,r5 + ldr r7,[sp,#0] + str r4,[r7,#C4L] + + //c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19); + //c0 high part + movs r6,r0 + movs r5,#(14) + rors r0,r0,r5 + eors r6,r6,r0 + ldr r5,[r7,#C0L] + movs r4,#(10) + rors r5,r5,r4 + eors r6,r6,r5 + str r6,[r7,#C0H] + ldr r4,[r7,#R32_1] + eors r4,r4,r6 + str r4,[r7,#R32_1] + //c0 low part + movs r4,#(32-10) + rors r5,r5,r4 + movs r6,r5 + movs r4,#((32-14+9)%32) + rors r0,r0,r4 + eors r0,r0,r6 + movs r4,#(14) + rors r5,r5,r4 + eors r0,r0,r5 + ldr r4,[r7,#R32_0] + eors r4,r4,r0 + str r4,[r7,#R32_0] + + //c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61); + //c1 high part + movs r6,r1 + movs r5,#(19) + rors r1,r1,r5 + eors r6,r6,r1 + ldr r5,[r7,#C1L] + movs r4,#(31) + rors r5,r5,r4 + eors r6,r6,r5 + str r6,[r7,#C1H] + ldr r4,[r7,#R32_3] + eors r4,r4,r6 + str r4,[r7,#R32_3] + //c1 low part + movs r4,#(32-31) + rors r5,r5,r4 + movs r6,r5 + movs r4,#((32-19+30)%32) + rors r1,r1,r4 + eors r1,r1,r6 + movs r4,#(19) + rors r5,r5,r4 + eors r1,r1,r5 + ldr r4,[r7,#R32_2] + eors r4,r4,r1 + str r4,[r7,#R32_2] + + //c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1); + //c2 high part + movs r6,r2 + movs r5,#(3) + rors r2,r2,r5 + eors r6,r6,r2 + ldr r5,[r7,#C2L] + movs r4,#(1) + rors r5,r5,r4 + eors r6,r6,r5 + str r6,[r7,#C2H] + ldr r4,[r7,#R32_0] + eors r4,r4,r6 + str r4,[r7,#R32_0] + //c2 low part + movs r4,#(32-1) + rors r5,r5,r4 + movs r6,r5 + movs r4,#((32-3+0)%32) + rors r2,r2,r4 + eors r2,r2,r6 + movs r4,#(3) + rors r5,r5,r4 + eors r2,r2,r5 + ldr r4,[r7,#R32_3] + eors r4,r4,r2 + str r4,[r7,#R32_3] + + //c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17); + //c3 high part + movs r6,r3 + movs r5,#(5) + rors r3,r3,r5 + eors r6,r6,r3 + ldr r5,[r7,#C3L] + movs r4,#(9) + rors r5,r5,r4 + eors r6,r6,r5 + str r6,[r7,#C3H] + ldr r4,[r7,#R32_2] + eors r4,r4,r6 + str r4,[r7,#R32_2] + //c3 low part + movs r4,#(32-9) + rors r5,r5,r4 + movs r6,r5 + movs r4,#((32-5+8)%32) + rors r3,r3,r4 + eors r3,r3,r6 + movs r4,#(5) + rors r5,r5,r4 + eors r3,r3,r5 + ldr r4,[r7,#R32_1] + eors r4,r4,r3 + str r4,[r7,#R32_1] + + ldr r4,[r7,#C4L] + ldr r5,[sp,#4] + + ldr r6,[sp,#8] + subs r6,#1 + bmi drygascon128_g_v6m_exit + + str r6,[sp,#8] + b drygascon128_g_v6m_main_loop +drygascon128_g_v6m_exit: + + str r3,[r7,#C3L] + str r2,[r7,#C2L] + str r1,[r7,#C1L] + str r0,[r7,#C0L] + + add sp,sp,#12 + pop {r4, r5, r6, r7, pc} +.size drygascon128_g_v6m, .-drygascon128_g_v6m + +.align 2 +.type drygascon128_f_v6m, %function +drygascon128_f_v6m: + //r0:state c r x + //r1:input -> shall be 32 bit aligned + //r2:ds + //r3:rounds + push {r4, r5, r6, r7, lr} + + //stack frame: + //0 ~ 28-1: buf + //28 :pointer on c + //32 : rounds for g + //36 :mix round / g round + + movs r4,#26 + push {r0,r3,r4} + sub sp,sp,#28 + + //load 10 bit mask in r4 = 0x3FF + movs r4,#0xFF + lsls r4,r4,#2 + adds r4,r4,#3 + + movs r7,#0 + //r=0 + str r7,[r0,#R32_0] + str r7,[r0,#R32_1] + str r7,[r0,#R32_2] + str r7,[r0,#R32_3] + + //r7 = sp + add r7,r7,sp + + ldr r3,[r1] + movs r5,r4 + ands r5,r5,r3 + strh r5,[r7,#0+26] + + lsrs r3,r3,#10 + movs r5,r4 + ands r5,r5,r3 + strh r5,[r7,#0+24] + + lsrs r3,r3,#10 + movs r5,r4 + ands r5,r5,r3 + strh r5,[r7,#0+22] + + lsrs r5,r3,#10 + ldr r3,[r1,#4] + lsls r6,r3,#2 + lsrs r3,r3,#8 + orrs r6,r6,r5 + movs r5,r4 + ands r5,r5,r6 + strh r5,[r7,#0+20] + + movs r5,r4 + ands r5,r5,r3 + strh r5,[r7,#0+18] + + lsrs r3,r3,#10 + movs r5,r4 + ands r5,r5,r3 + strh r5,[r7,#0+16] + + lsrs r5,r3,#10 + ldr r3,[r1,#8] + lsls r6,r3,#4 + lsrs r3,r3,#6 + orrs r6,r6,r5 + movs r5,r4 + ands r5,r5,r6 + strh r5,[r7,#0+14] + + movs r5,r4 + ands r5,r5,r3 + strh r5,[r7,#0+12] + + lsrs r3,r3,#10 + movs r5,r4 + ands r5,r5,r3 + strh r5,[r7,#0+10] + + lsrs r5,r3,#10 + ldr r3,[r1,#12] + lsls r6,r3,#6 + lsrs r3,r3,#4 + orrs r6,r6,r5 + movs r5,r4 + ands r5,r5,r6 + strh r5,[r7,#0+8] + + movs r5,r4 + ands r5,r5,r3 + strh r5,[r7,#0+6] + + lsrs r3,r3,#10 + movs r5,r4 + ands r5,r5,r3 + strh r5,[r7,#0+4] + + lsrs r5,r3,#10 + lsls r6,r2,#8 + lsrs r3,r2,#2 + orrs r6,r6,r5 + movs r5,r4 + ands r5,r5,r6 + strh r5,[r7,#0+2] + + movs r5,r4 + ands r5,r5,r3 + strh r5,[r7,#0+0] + + movs r7,#26 + +drygascon128_f_v6m_mix128_main_loop: + movs r6,#0 + add r6,r6,sp + ldrh r6,[r6,r7] + + ldr r5,[sp,#28] + movs r7,r5 + adds r5,r5,#X0 + movs r4,#0xc + + lsls r0,r6,#2 + ands r0,r0,r4 + ldr r1,[r5,r0] + ldr r0,[r7,#0*8] + eors r0,r0,r1 + + lsrs r1,r6,#0 + ands r1,r1,r4 + ldr r2,[r5,r1] + ldr r1,[r7,#1*8] + eors r1,r1,r2 + + lsrs r2,r6,#2 + ands r2,r2,r4 + ldr r3,[r5,r2] + ldr r2,[r7,#2*8] + eors r2,r2,r3 + + lsrs r3,r6,#4 + ands r3,r3,r4 + ldr r4,[r5,r3] + ldr r3,[r7,#3*8] + eors r3,r3,r4 + + lsrs r4,r6,#6+2 + lsls r4,r4,#2 + ldr r6,[r5,r4] + ldr r4,[r7,#4*8] + eors r4,r4,r6 + + ldr r6,[sp,#36] + subs r6,#2 + bpl drygascon128_f_v6m_mix128_coreround + b drygascon128_f_v6m_mix128_exit +drygascon128_f_v6m_mix128_coreround: + str r6,[sp,#36] + + movs r6,#0xf0 + // addition of round constant + //r2 ^= r6; + eors r2,r2,r6 + + // substitution layer, lower half + eors r0,r0,r4 + eors r4,r4,r3 + eors r2,r2,r1 + + mvns r5,r0 + mvns r6,r3 + mvns r7,r4 + ands r5,r5,r1 + ands r6,r6,r4 + eors r4,r4,r5 + + ands r7,r7,r0 + mvns r5,r2 + ands r5,r5,r3 + eors r3,r3,r7 + + mvns r7,r1 + ands r7,r7,r2 + eors r2,r2,r6 + + eors r3,r3,r2 + mvns r2,r2 + + eors r0,r0,r7 + eors r1,r1,r5 + eors r1,r1,r0 + eors r0,r0,r4 + + ldr r7,[sp,#28] + str r4,[r7,#C4L] + str r3,[r7,#C3L] + str r2,[r7,#C2L] + str r1,[r7,#C1L] + str r0,[r7,#C0L] + + ldr r4,[r7,#C4H] + ldr r3,[r7,#C3H] + ldr r2,[r7,#C2H] + ldr r1,[r7,#C1H] + ldr r0,[r7,#C0H] + + // substitution layer, upper half + eors r0,r0,r4 + eors r4,r4,r3 + eors r2,r2,r1 + + mvns r5,r0 + mvns r6,r3 + mvns r7,r4 + ands r5,r5,r1 + ands r6,r6,r4 + eors r4,r4,r5 + + ands r7,r7,r0 + mvns r5,r2 + ands r5,r5,r3 + eors r3,r3,r7 + + mvns r7,r1 + ands r7,r7,r2 + eors r2,r2,r6 + + eors r3,r3,r2 + mvns r2,r2 + + eors r0,r0,r7 + eors r1,r1,r5 + eors r1,r1,r0 + eors r0,r0,r4 + + // linear diffusion layer + ldr r7,[sp,#28] + + //c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7); + //c4 high part + movs r6,r4 + movs r5,#(20) + rors r4,r4,r5 + eors r6,r6,r4 + ldr r5,[r7,#C4L] + movs r7,#(4) + rors r5,r5,r7 + eors r6,r6,r5 + ldr r7,[sp,#28] + str r6,[r7,#C4H] + //c4 low part + movs r7,#(32-4) + rors r5,r5,r7 + movs r6,r5 + movs r7,#((32-20+3)%32) + rors r4,r4,r7 + eors r4,r4,r6 + movs r7,#(20) + rors r5,r5,r7 + eors r4,r4,r5 + ldr r7,[sp,#28] + str r4,[r7,#C4L] + + //c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19); + //c0 high part + movs r6,r0 + movs r5,#(14) + rors r0,r0,r5 + eors r6,r6,r0 + ldr r5,[r7,#C0L] + movs r4,#(10) + rors r5,r5,r4 + eors r6,r6,r5 + str r6,[r7,#C0H] + //c0 low part + movs r4,#(32-10) + rors r5,r5,r4 + movs r6,r5 + movs r4,#((32-14+9)%32) + rors r0,r0,r4 + eors r0,r0,r6 + movs r4,#(14) + rors r5,r5,r4 + eors r0,r0,r5 + + //c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61); + //c1 high part + movs r6,r1 + movs r5,#(19) + rors r1,r1,r5 + eors r6,r6,r1 + ldr r5,[r7,#C1L] + movs r4,#(31) + rors r5,r5,r4 + eors r6,r6,r5 + str r6,[r7,#C1H] + //c1 low part + movs r4,#(32-31) + rors r5,r5,r4 + movs r6,r5 + movs r4,#((32-19+30)%32) + rors r1,r1,r4 + eors r1,r1,r6 + movs r4,#(19) + rors r5,r5,r4 + eors r1,r1,r5 + + //c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1); + //c2 high part + movs r6,r2 + movs r5,#(3) + rors r2,r2,r5 + eors r6,r6,r2 + ldr r5,[r7,#C2L] + movs r4,#(1) + rors r5,r5,r4 + eors r6,r6,r5 + str r6,[r7,#C2H] + //c2 low part + movs r4,#(32-1) + rors r5,r5,r4 + movs r6,r5 + movs r4,#((32-3+0)%32) + rors r2,r2,r4 + eors r2,r2,r6 + movs r4,#(3) + rors r5,r5,r4 + eors r2,r2,r5 + + //c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17); + //c3 high part + movs r6,r3 + movs r5,#(5) + rors r3,r3,r5 + eors r6,r6,r3 + ldr r5,[r7,#C3L] + movs r4,#(9) + rors r5,r5,r4 + eors r6,r6,r5 + str r6,[r7,#C3H] + //c3 low part + movs r4,#(32-9) + rors r5,r5,r4 + movs r6,r5 + movs r4,#((32-5+8)%32) + rors r3,r3,r4 + eors r3,r3,r6 + movs r4,#(5) + rors r5,r5,r4 + eors r3,r3,r5 + + str r3,[r7,#C3L] + str r2,[r7,#C2L] + str r1,[r7,#C1L] + str r0,[r7,#C0L] + + ldr r7,[sp,#36] + + b drygascon128_f_v6m_mix128_main_loop +drygascon128_f_v6m_mix128_exit: + ldr r7,[sp,#32] + //round=r5=rounds-1; + subs r6,r7,#1 + //base = round_cst+12-rounds + adr r5, round_cst + adds r5,r5,#12 + subs r5,r5,r7 + + add sp,sp,#28 + str r5,[sp,#4] + str r6,[sp,#8] + + //push {r0,r1,r2,r3} + //ldr r0,[sp,#16] + //bl print_state + //pop {r0,r1,r2,r3} + + b drygascon128_g_v6m_main_loop + +.align 2 +round_cst: +.byte 0x4b +.byte 0x5a +.byte 0x69 +.byte 0x78 +.byte 0x87 +.byte 0x96 +.byte 0xa5 +.byte 0xb4 +.byte 0xc3 +.byte 0xd2 +.byte 0xe1 +.byte 0xf0 +.align 2 + +.size drygascon128_f_v6m, .-drygascon128_f_v6m + + +#endif diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m.S b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m.S new file mode 100644 index 0000000..f0c5fa1 --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m.S @@ -0,0 +1,692 @@ +/** +DryGascon128 'v7m implementation' +Sebastien Riou, May 27th 2020 + +Implementation optimized for ARM-Cortex-M7/M4/M3 (Size and Speed) +Safe against timing attack on X look up operations under +the following conditions: (safe if at least one line is true) +- System without cache +- State stored in non cacheable memory (like DTCM) +- Cache lines are 16 bytes or larger AND X is 16 bytes aligned + + +Notes: +- Arm Cortex-M7 Processor Technical Reference Manual Revision r1p2 states + that data cache line size is 32 bytes. +- Microchip app note TB3186 shows that Microchip use 16 bytes cache lines. +- ST does not give a general statement about cache lines for its products based +on M3 and M4. That said STM32F411xC/E datasheet (RM0383 +Reference manual) shows data cache lines of 16 bytes. +- In the unlikely case in which none of the condition can be met, +the 'v7m_fpu_x' can be used to prevent this attack. + +Note that implementation 'v7m_fpu' is faster (but requires FPU). +*/ +#if defined(__DRYGASCON_ARM_SELECTOR_H__) +.cpu cortex-m3 +.syntax unified +.code 16 +.thumb_func + +.align 1 +.global drygascon128_g_v7m +.global drygascon128_f_v7m +.global drygascon128_g0_v7m + + .equ C0, 0 + .equ C1, C0+8 + .equ C2, C0+16 + .equ C3, C0+24 + .equ C4, C0+32 + .equ R0, 48 + .equ R1, R0+8 + .equ X0, 64 + .equ X1, X0+8 + + .equ X0L, X0 + .equ X1L, X1 + .equ C0L, C0 + .equ C1L, C1 + .equ C2L, C2 + .equ C3L, C3 + .equ C4L, C4 + .equ R0L, R0 + .equ R1L, R1 + + .equ X0H, X0+4 + .equ X1H, X1+4 + .equ C0H, C0+4 + .equ C1H, C1+4 + .equ C2H, C2+4 + .equ C3H, C3+4 + .equ C4H, C4+4 + .equ R0H, R0+4 + .equ R1H, R1+4 + + .equ R32_0, R0L + .equ R32_1, R0H + .equ R32_2, R1L + .equ R32_3, R1H + + + +.type drygascon128_g_v7m, %function +drygascon128_g_v7m: + //r0: state: c,r,x + //r1: rounds + push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + //stack vars: + // 8 round + // 4 rounds (base address for lookups) + // 0 state address + + //r=0 + movs r10,#0 + str r10,[r0,#R32_0] + str r10,[r0,#R32_1] + str r10,[r0,#R32_2] + str r10,[r0,#R32_3] + + //round=r10=rounds-1; + subs r11,r1,#1 + //base = round_cst+12-rounds + adr r10, round_cst + adds r10,r10,#12 + subs r10,r10,r1 + + push {r0,r10,r11} + + //Load C + adds r14,r0,#C0 + LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + //loop entry + //assume r11>0 at entry +drygascon128_g_v7m_main_loop: + //r0~r9: c + //r10: base for round constants + //r11: round, counting from rounds-1 to 0 + + //r11 = ((0xf - r11) << 4) | r11; + ldrb r11,[r10,r11] + //state: + //r0 to r9: c + //r11: constant to add as round constant + //r14: pointer on C + + // addition of round constant + //C2L ^= round constant; + eors r4,r4,r11 + + // substitution layer, lower half + eors r0,r0,r8 + eors r8,r8,r6 + eors r4,r4,r2 + mvns r10,r0 + mvns r11,r6 + mvns r12,r8 + ands r10,r10,r2 + ands r11,r11,r8 + eors r8,r8,r10 + ands r12,r12,r0 + mvns r10,r4 + ands r10,r10,r6 + eors r6,r6,r12 + mvns r12,r2 + ands r12,r12,r4 + eors r4,r4,r11 + eors r6,r6,r4 + mvns r4,r4 + eors r0,r0,r12 + eors r2,r2,r10 + eors r2,r2,r0 + eors r0,r0,r8 + + // substitution layer, upper half + eors r1,r1,r9 + eors r9,r9,r7 + eors r5,r5,r3 + mvns r10,r1 + mvns r11,r7 + mvns r12,r9 + ands r10,r10,r3 + ands r11,r11,r9 + eors r9,r9,r10 + ands r12,r12,r1 + mvns r10,r5 + ands r10,r10,r7 + eors r7,r7,r12 + mvns r12,r3 + ands r12,r12,r5 + eors r5,r5,r11 + eors r7,r7,r5 + mvns r5,r5 + eors r1,r1,r12 + eors r3,r3,r10 + eors r3,r3,r1 + eors r1,r1,r9 + + + // linear diffusion layer + + //c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7); + //c4 high part + rors r11,r9,#(20) + eors r9,r11,r9 + rors r10,r8,#(4) + eors r9,r10,r9 + //c4 low part + rors r11,r11,#((32-20+3)%32) + eors r11,r11,r8 + rors r10,r8,#(20) + eors r8,r10,r11 + + //c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19); + //c0 high part + rors r11,r1,#(14) + eors r1,r11,r1 + rors r10,r0,#(10) + eors r1,r10,r1 + ldr r12,[r14,#R32_1-C0] + eors r12,r12,r1 + str r12,[r14,#R32_1-C0] + //c0 low part + rors r11,r11,#((32-14+9)%32) + eors r11,r11,r0 + rors r10,r0,#(14) + eors r0,r10,r11 + ldr r12,[r14,#R32_0-C0] + eors r12,r12,r0 + str r12,[r14,#R32_0-C0] + + //c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61); + //c1 high part + rors r11,r3,#(19) + eors r3,r11,r3 + rors r10,r2,#(31) + eors r3,r10,r3 + ldr r12,[r14,#R32_3-C0] + eors r12,r12,r3 + str r12,[r14,#R32_3-C0] + //c1 low part + rors r11,r11,#((32-19+30)%32) + eors r11,r11,r2 + rors r10,r2,#(19) + eors r2,r10,r11 + ldr r12,[r14,#R32_2-C0] + eors r12,r12,r2 + str r12,[r14,#R32_2-C0] + + //c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1); + //c2 high part + rors r11,r5,#(3) + eors r5,r11,r5 + rors r10,r4,#(1) + eors r5,r10,r5 + ldr r12,[r14,#R32_0-C0] + eors r12,r12,r5 + str r12,[r14,#R32_0-C0] + //c2 low part + rors r11,r11,#((32-3+0)%32) + eors r11,r11,r4 + rors r10,r4,#(3) + eors r4,r10,r11 + ldr r12,[r14,#R32_3-C0] + eors r12,r12,r4 + str r12,[r14,#R32_3-C0] + + //c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17); + //c3 high part + rors r11,r7,#(5) + eors r7,r11,r7 + rors r10,r6,#(9) + eors r7,r10,r7 + ldr r12,[r14,#R32_2-C0] + eors r12,r12,r7 + str r12,[r14,#R32_2-C0] + //c3 low part + rors r11,r11,#((32-5+8)%32) + eors r11,r11,r6 + rors r10,r6,#(5) + eors r6,r10,r11 + ldr r12,[r14,#R32_1-C0] + eors r12,r12,r6 + str r12,[r14,#R32_1-C0] + + //state: + //r0 to r9: c + //r10,r11,r12 destroyed + + ldr r10,[sp,#4] + + ldr r11,[sp,#8] + subs r11,#1 + bmi drygascon128_g_v7m_exit + + str r11,[sp,#8] + b drygascon128_g_v7m_main_loop +drygascon128_g_v7m_exit: + //update C + STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + add sp,sp,#12 + pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} +.size drygascon128_g_v7m, .-drygascon128_g_v7m + +.align 2 +.type drygascon128_f_v7m, %function +drygascon128_f_v7m: + //r0:state + //r1:input + //r2:ds + //r3:rounds + push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + + //stack frame: + //0: pointer on input + //4: DS value + //8 :pointer on state + //12 : rounds for g + //16 :mix round / g round + + movs r10,#0 //init of input bit counter + push {r0,r3,r10} //make the same stack frame as drygascon128_g_cm7 + push {r1,r2} + //r=0 + str r10,[r0,#R32_0] + str r10,[r0,#R32_1] + str r10,[r0,#R32_2] + str r10,[r0,#R32_3] + + //Load C + adds r11,r0,#C0 + LDMIA.W r11,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + +drygascon128_f_v7m_mix128_main_loop: + //r10 is input bit counter + ldr r11,[sp,#0] //r11 is pointer on input + + //r10 r12 shift + // 0 0 0 + // 10 1 2 + // 20 2 4 + // 30 3 6 + // 40 5 0 + // 50 6 2 + // 60 7 4 + // 70 8 6 + // 80 10 0 + // 90 11 2 + // 100 12 4 + // 110 13 6 + // 120 15 0 + // 130 16 2 --> we do that operation for 2 last bits in a special last loop + + cmp r10,#120 + bne drygascon128_f_v7m_mix128_main_loop.regular + + //we execute this only during the pen-ultimate operation + //we add the 2 lsb from DS to r14 + ldrb r14,[r11,#15] + ldr r10,[sp,#4] + lsl r10,r10,#8 + eors r14,r14,r10 + b drygascon128_f_v7m_mix128_main_loop.core + +drygascon128_f_v7m_mix128_main_loop.regular: + //r12 is base byte: byte offset to read from input buffer + lsr r12,r10,#3 //divide by 8 to get base byte + //r10 becomes shift + lsl r14,r12,#3 + sub r10,r10,r14 + + ldr r14,[r11,r12] //M7 supports unalign access with ldr + lsr r14,r14,r10 + +drygascon128_f_v7m_mix128_main_loop.core: + ldr r10,[sp,#8] + adds r10,r10,#X0 + + lsls r11,r14,#2 + ands r11,r11,#0xc + ldr r11,[r10,r11] + eors r0,r0,r11 + + lsrs r11,r14,#0 + ands r11,r11,#0xc + ldr r11,[r10,r11] + eors r2,r2,r11 + + lsrs r11,r14,#2 + ands r11,r11,#0xc + ldr r11,[r10,r11] + eors r4,r4,r11 + + lsrs r11,r14,#4 + ands r11,r11,#0xc + ldr r11,[r10,r11] + eors r6,r6,r11 + + lsrs r11,r14,#6 + ands r11,r11,#0xc + ldr r11,[r10,r11] + eors r8,r8,r11 + + ldr r10,[sp,#16] + adds r10,#10 + cmp r10,#140 + beq drygascon128_f_v7m_mix128_exit +drygascon128_f_v7m_mix128_coreround: + str r10,[sp,#16] + + movs r11,#0xf0 + + //state: + //r0 to r9: c + //r11: constant to add as round constant + + // addition of round constant + //C2L ^= round constant; + eors r4,r4,r11 + + // substitution layer, lower half + eors r0,r0,r8 + eors r8,r8,r6 + eors r4,r4,r2 + mvns r10,r0 + mvns r11,r6 + mvns r12,r8 + ands r10,r10,r2 + ands r11,r11,r8 + eors r8,r8,r10 + ands r12,r12,r0 + mvns r10,r4 + ands r10,r10,r6 + eors r6,r6,r12 + mvns r12,r2 + ands r12,r12,r4 + eors r4,r4,r11 + eors r6,r6,r4 + mvns r4,r4 + eors r0,r0,r12 + eors r2,r2,r10 + eors r2,r2,r0 + eors r0,r0,r8 + + // substitution layer, upper half + eors r1,r1,r9 + eors r9,r9,r7 + eors r5,r5,r3 + mvns r10,r1 + mvns r11,r7 + mvns r12,r9 + ands r10,r10,r3 + ands r11,r11,r9 + eors r9,r9,r10 + ands r12,r12,r1 + mvns r10,r5 + ands r10,r10,r7 + eors r7,r7,r12 + mvns r12,r3 + ands r12,r12,r5 + eors r5,r5,r11 + eors r7,r7,r5 + mvns r5,r5 + eors r1,r1,r12 + eors r3,r3,r10 + eors r3,r3,r1 + eors r1,r1,r9 + + + // linear diffusion layer + + //c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7); + //c4 high part + rors r11,r9,#(20) + eors r9,r11,r9 + rors r10,r8,#(4) + eors r9,r10,r9 + //c4 low part + rors r11,r11,#((32-20+3)%32) + eors r11,r11,r8 + rors r10,r8,#(20) + eors r8,r10,r11 + + //c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19); + //c0 high part + rors r11,r1,#(14) + eors r1,r11,r1 + rors r10,r0,#(10) + eors r1,r10,r1 + //c0 low part + rors r11,r11,#((32-14+9)%32) + eors r11,r11,r0 + rors r10,r0,#(14) + eors r0,r10,r11 + + //c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61); + //c1 high part + rors r11,r3,#(19) + eors r3,r11,r3 + rors r10,r2,#(31) + eors r3,r10,r3 + //c1 low part + rors r11,r11,#((32-19+30)%32) + eors r11,r11,r2 + rors r10,r2,#(19) + eors r2,r10,r11 + + //c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1); + //c2 high part + rors r11,r5,#(3) + eors r5,r11,r5 + rors r10,r4,#(1) + eors r5,r10,r5 + //c2 low part + rors r11,r11,#((32-3+0)%32) + eors r11,r11,r4 + rors r10,r4,#(3) + eors r4,r10,r11 + + //c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17); + //c3 high part + rors r11,r7,#(5) + eors r7,r11,r7 + rors r10,r6,#(9) + eors r7,r10,r7 + //c3 low part + rors r11,r11,#((32-5+8)%32) + eors r11,r11,r6 + rors r10,r6,#(5) + eors r6,r10,r11 + + //state: + //r0 to r9: c + //r10,r11,r12 destroyed + + ldr r10,[sp,#16] + cmp r10,#130 + bne drygascon128_f_v7m_mix128_main_loop + //prepare the last loop: load DS 2 msb + ldr r14,[sp,4] + lsr r14,r14,#2 + b drygascon128_f_v7m_mix128_main_loop.core + +drygascon128_f_v7m_mix128_exit: + ldr r14,[sp,#12] + //round=r10=rounds-1; + subs r11,r14,#1 + //base = round_cst+12-rounds + adr r10, round_cst + adds r10,r10,#12 + subs r10,r10,r14 + + str r10,[sp,#12] + str r11,[sp,#16] + + ldr r14,[sp,#8] + add sp,sp,#8 + b drygascon128_g_v7m_main_loop + +.align 2 +round_cst: +.byte 0x4b +.byte 0x5a +.byte 0x69 +.byte 0x78 +.byte 0x87 +.byte 0x96 +.byte 0xa5 +.byte 0xb4 +.byte 0xc3 +.byte 0xd2 +.byte 0xe1 +.byte 0xf0 +.align 2 + +.size drygascon128_f_v7m, .-drygascon128_f_v7m + +.type drygascon128_g0_v7m, %function +drygascon128_g0_v7m: + //perform a single round without accumulate + //r0: state + push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + + //Load C + adds r14,r0,#C0 + LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + //r0~r9: c + + //r11 = ((0xf - 0) << 4) | 0; + movs r11,#0xf0 + //state: + //r0 to r9: c + //r11: constant to add as round constant + + // addition of round constant + //C2L ^= round constant; + eors r4,r4,r11 + + // substitution layer, lower half + eors r0,r0,r8 + eors r8,r8,r6 + eors r4,r4,r2 + mvns r10,r0 + mvns r11,r6 + mvns r12,r8 + ands r10,r10,r2 + ands r11,r11,r8 + eors r8,r8,r10 + ands r12,r12,r0 + mvns r10,r4 + ands r10,r10,r6 + eors r6,r6,r12 + mvns r12,r2 + ands r12,r12,r4 + eors r4,r4,r11 + eors r6,r6,r4 + mvns r4,r4 + eors r0,r0,r12 + eors r2,r2,r10 + eors r2,r2,r0 + eors r0,r0,r8 + + // substitution layer, upper half + eors r1,r1,r9 + eors r9,r9,r7 + eors r5,r5,r3 + mvns r10,r1 + mvns r11,r7 + mvns r12,r9 + ands r10,r10,r3 + ands r11,r11,r9 + eors r9,r9,r10 + ands r12,r12,r1 + mvns r10,r5 + ands r10,r10,r7 + eors r7,r7,r12 + mvns r12,r3 + ands r12,r12,r5 + eors r5,r5,r11 + eors r7,r7,r5 + mvns r5,r5 + eors r1,r1,r12 + eors r3,r3,r10 + eors r3,r3,r1 + eors r1,r1,r9 + + + // linear diffusion layer + + //c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7); + //c4 high part + rors r11,r9,#(20) + eors r9,r11,r9 + rors r10,r8,#(4) + eors r9,r10,r9 + //c4 low part + rors r11,r11,#((32-20+3)%32) + eors r11,r11,r8 + rors r10,r8,#(20) + eors r8,r10,r11 + + //c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19); + //c0 high part + rors r11,r1,#(14) + eors r1,r11,r1 + rors r10,r0,#(10) + eors r1,r10,r1 + //c0 low part + rors r11,r11,#((32-14+9)%32) + eors r11,r11,r0 + rors r10,r0,#(14) + eors r0,r10,r11 + + //c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61); + //c1 high part + rors r11,r3,#(19) + eors r3,r11,r3 + rors r10,r2,#(31) + eors r3,r10,r3 + //c1 low part + rors r11,r11,#((32-19+30)%32) + eors r11,r11,r2 + rors r10,r2,#(19) + eors r2,r10,r11 + + //c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1); + //c2 high part + rors r11,r5,#(3) + eors r5,r11,r5 + rors r10,r4,#(1) + eors r5,r10,r5 + //c2 low part + rors r11,r11,#((32-3+0)%32) + eors r11,r11,r4 + rors r10,r4,#(3) + eors r4,r10,r11 + + //c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17); + //c3 high part + rors r11,r7,#(5) + eors r7,r11,r7 + rors r10,r6,#(9) + eors r7,r10,r7 + //c3 low part + rors r11,r11,#((32-5+8)%32) + eors r11,r11,r6 + rors r10,r6,#(5) + eors r6,r10,r11 + + //state: + //r0 to r9: c + //r10,r11,r12 destroyed + + //update C + STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} +.size drygascon128_g0_v7m, .-drygascon128_g0_v7m +#endif diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu.S b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu.S new file mode 100644 index 0000000..d016dc8 --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu.S @@ -0,0 +1,692 @@ +/** +DryGascon128 'v7m_fpu implementation' +Sebastien Riou, May 27th 2020 + +Implementation optimized for ARM-Cortex-M7/M4/M3 (Size and Speed) +Safe against timing attack on X look up operations under +the following conditions: (safe if at least one line is true) +- System without cache +- State stored in non cacheable memory (like DTCM) +- Cache lines are 16 bytes or larger AND X is 16 bytes aligned + + +Notes: +- Arm Cortex-M7 Processor Technical Reference Manual Revision r1p2 states + that data cache line size is 32 bytes. +- Microchip app note TB3186 shows that Microchip use 16 bytes cache lines. +- ST does not give a general statement about cache lines for its products based +on M3 and M4. That said STM32F411xC/E datasheet (RM0383 +Reference manual) shows data cache lines of 16 bytes. +- In the unlikely case in which none of the condition can be met, +the 'v7m_fpu_x' can be used to prevent this attack. +*/ +#if defined(__DRYGASCON_ARM_SELECTOR_H__) +.cpu cortex-m3 +.syntax unified +.code 16 +.thumb_func + +.align 1 +.global drygascon128_g_v7m_fpu +.global drygascon128_f_v7m_fpu +.global drygascon128_g0_v7m_fpu + + .equ C0, 0 + .equ C1, C0+8 + .equ C2, C0+16 + .equ C3, C0+24 + .equ C4, C0+32 + .equ R0, 48 + .equ R1, R0+8 + .equ X0, 64 + .equ X1, X0+8 + + .equ X0L, X0 + .equ X1L, X1 + .equ C0L, C0 + .equ C1L, C1 + .equ C2L, C2 + .equ C3L, C3 + .equ C4L, C4 + .equ R0L, R0 + .equ R1L, R1 + + .equ X0H, X0+4 + .equ X1H, X1+4 + .equ C0H, C0+4 + .equ C1H, C1+4 + .equ C2H, C2+4 + .equ C3H, C3+4 + .equ C4H, C4+4 + .equ R0H, R0+4 + .equ R1H, R1+4 + + .equ R32_0, R0L + .equ R32_1, R0H + .equ R32_2, R1L + .equ R32_3, R1H + + + +.type drygascon128_g_v7m_fpu, %function +drygascon128_g_v7m_fpu: + //r0: state: c,r,x + //r1: rounds + push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + //stack vars: + // 8 round + // 4 rounds (base address for lookups) + // 0 state address + + //r=0 + VSUB.F32 S10, S10, S10 + VSUB.F32 S11, S11, S11 + VSUB.F32 S12, S12, S12 + VSUB.F32 S13, S13, S13 + + //round=r10=rounds-1; + subs r11,r1,#1 + //base = round_cst+12-rounds + adr r10, round_cst + adds r10,r10,#12 + subs r10,r10,r1 + + push {r0,r10,r11} + + //Load C + adds r14,r0,#C0 + LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + //loop entry + //assume r11>0 at entry +drygascon128_g_v7m_fpu_main_loop: + //r0~r9: c + //r10: base for round constants + //r11: round, counting from rounds-1 to 0 + + //r11 = ((0xf - r11) << 4) | r11; + ldrb r11,[r10,r11] + //state: + //r0 to r9: c + //r11: constant to add as round constant + //FPU: + //s11 to s14: r + + // addition of round constant + //C2L ^= round constant; + eors r4,r4,r11 + + // substitution layer, lower half + eors r0,r0,r8 + eors r8,r8,r6 + eors r4,r4,r2 + mvns r10,r0 + mvns r11,r6 + mvns r12,r8 + ands r10,r10,r2 + ands r11,r11,r8 + eors r8,r8,r10 + ands r12,r12,r0 + mvns r10,r4 + ands r10,r10,r6 + eors r6,r6,r12 + mvns r12,r2 + ands r12,r12,r4 + eors r4,r4,r11 + eors r6,r6,r4 + mvns r4,r4 + eors r0,r0,r12 + eors r2,r2,r10 + eors r2,r2,r0 + eors r0,r0,r8 + + // substitution layer, upper half + eors r1,r1,r9 + eors r9,r9,r7 + eors r5,r5,r3 + mvns r10,r1 + mvns r11,r7 + mvns r12,r9 + ands r10,r10,r3 + ands r11,r11,r9 + eors r9,r9,r10 + ands r12,r12,r1 + mvns r10,r5 + ands r10,r10,r7 + eors r7,r7,r12 + mvns r12,r3 + ands r12,r12,r5 + eors r5,r5,r11 + eors r7,r7,r5 + mvns r5,r5 + eors r1,r1,r12 + eors r3,r3,r10 + eors r3,r3,r1 + eors r1,r1,r9 + + + // linear diffusion layer + + //c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7); + //c4 high part + rors r11,r9,#(20) + eors r9,r11,r9 + rors r10,r8,#(4) + eors r9,r10,r9 + //c4 low part + rors r11,r11,#((32-20+3)%32) + eors r11,r11,r8 + rors r10,r8,#(20) + eors r8,r10,r11 + + vmov r14,S11 + //c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19); + //c0 high part + rors r11,r1,#(14) + eors r1,r11,r1 + rors r10,r0,#(10) + eors r1,r10,r1 + //r14 is R32_1 + eors r14,r14,r1 + vmov r12,S10 + //c0 low part + rors r11,r11,#((32-14+9)%32) + eors r11,r11,r0 + rors r10,r0,#(14) + eors r0,r10,r11 + //r12 is R32_0 + eors r12,r12,r0 + //c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1); + //c2 high part + rors r11,r5,#(3) + eors r5,r11,r5 + rors r10,r4,#(1) + eors r5,r10,r5 + //r12 is R32_0 + eors r12,r12,r5 + vmov S10,r12 + vmov r12,S13 + //c2 low part + rors r11,r11,#((32-3+0)%32) + eors r11,r11,r4 + rors r10,r4,#(3) + eors r4,r10,r11 + //r12 is R32_3 + eors r12,r12,r4 + //c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61); + //c1 high part + rors r11,r3,#(19) + eors r3,r11,r3 + rors r10,r2,#(31) + eors r3,r10,r3 + //r12 is R32_3 + eors r12,r12,r3 + vmov S13,r12 + vmov r12,S12 + //c1 low part + rors r11,r11,#((32-19+30)%32) + eors r11,r11,r2 + rors r10,r2,#(19) + eors r2,r10,r11 + //r12 is R32_2 + eors r12,r12,r2 + //c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17); + //c3 high part + rors r11,r7,#(5) + eors r7,r11,r7 + rors r10,r6,#(9) + eors r7,r10,r7 + //r12 is R32_2 + eors r12,r12,r7 + vmov S12,r12 + //c3 low part + rors r11,r11,#((32-5+8)%32) + eors r11,r11,r6 + rors r10,r6,#(5) + eors r6,r10,r11 + //r14 is R32_1 + eors r14,r14,r6 + vmov S11,r14 + + //state: + //r0 to r9: c + //r10,r11,r12 destroyed + + ldr r10,[sp,#4] + + ldr r11,[sp,#8] + subs r11,#1 + bmi drygascon128_g_v7m_fpu_exit + + str r11,[sp,#8] + b drygascon128_g_v7m_fpu_main_loop +drygascon128_g_v7m_fpu_exit: + //update C + ldr r14,[sp,#0] + STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + //update R + ldr r11,[sp,#0] + adds r11,r11,#R0 + VSTMIA.F32 r11, {S10,S11,S12,S13} + + add sp,sp,#12 + pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} +.size drygascon128_g_v7m_fpu, .-drygascon128_g_v7m_fpu + +.align 2 +.type drygascon128_f_v7m_fpu, %function +drygascon128_f_v7m_fpu: + //r0:state + //r1:input + //r2:ds + //r3:rounds + push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + + //stack frame: + //0: pointer on input + //4: DS value + //8 :pointer on state + //12 : rounds for g + //16 :mix round / g round + + movs r10,#0 //init of input bit counter + push {r0,r3,r10} //make the same stack frame as drygascon128_g_cm7 + push {r1,r2} + //r=0 + VSUB.F32 S10, S10, S10 + VSUB.F32 S11, S11, S11 + VSUB.F32 S12, S12, S12 + VSUB.F32 S13, S13, S13 + + //Load C + adds r11,r0,#C0 + LDMIA.W r11,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + +drygascon128_f_v7m_fpu_mix128_main_loop: + //r10 is input bit counter + ldr r11,[sp,#0] //r11 is pointer on input + + //r10 r12 shift + // 0 0 0 + // 10 1 2 + // 20 2 4 + // 30 3 6 + // 40 5 0 + // 50 6 2 + // 60 7 4 + // 70 8 6 + // 80 10 0 + // 90 11 2 + // 100 12 4 + // 110 13 6 + // 120 15 0 + // 130 16 2 --> we do that operation for 2 last bits in a special last loop + + cmp r10,#120 + bne drygascon128_f_v7m_fpu_mix128_main_loop.regular + + //we execute this only during the pen-ultimate operation + //we add the 2 lsb from DS to r14 + ldrb r14,[r11,#15] + ldr r10,[sp,#4] + lsl r10,r10,#8 + eors r14,r14,r10 + b drygascon128_f_v7m_fpu_mix128_main_loop.core + +drygascon128_f_v7m_fpu_mix128_main_loop.regular: + //r12 is base byte: byte offset to read from input buffer + lsr r12,r10,#3 //divide by 8 to get base byte + //r10 becomes shift + lsl r14,r12,#3 + sub r10,r10,r14 + + ldr r14,[r11,r12] //M7 supports unalign access with ldr + lsr r14,r14,r10 + +drygascon128_f_v7m_fpu_mix128_main_loop.core: + ldr r10,[sp,#8] + adds r10,r10,#X0 + + lsls r11,r14,#2 + ands r11,r11,#0xc + ldr r11,[r10,r11] + eors r0,r0,r11 + + lsrs r11,r14,#0 + ands r11,r11,#0xc + ldr r11,[r10,r11] + eors r2,r2,r11 + + lsrs r11,r14,#2 + ands r11,r11,#0xc + ldr r11,[r10,r11] + eors r4,r4,r11 + + lsrs r11,r14,#4 + ands r11,r11,#0xc + ldr r11,[r10,r11] + eors r6,r6,r11 + + lsrs r11,r14,#6 + ands r11,r11,#0xc + ldr r11,[r10,r11] + eors r8,r8,r11 + + ldr r10,[sp,#16] + adds r10,#10 + cmp r10,#140 + beq drygascon128_f_v7m_fpu_mix128_exit +drygascon128_f_v7m_fpu_mix128_coreround: + str r10,[sp,#16] + + movs r11,#0xf0 + + //state: + //r0 to r9: c + //r11: constant to add as round constant + + // addition of round constant + //C2L ^= round constant; + eors r4,r4,r11 + + // substitution layer, lower half + eors r0,r0,r8 + eors r8,r8,r6 + eors r4,r4,r2 + mvns r10,r0 + mvns r11,r6 + mvns r12,r8 + ands r10,r10,r2 + ands r11,r11,r8 + eors r8,r8,r10 + ands r12,r12,r0 + mvns r10,r4 + ands r10,r10,r6 + eors r6,r6,r12 + mvns r12,r2 + ands r12,r12,r4 + eors r4,r4,r11 + eors r6,r6,r4 + mvns r4,r4 + eors r0,r0,r12 + eors r2,r2,r10 + eors r2,r2,r0 + eors r0,r0,r8 + + // substitution layer, upper half + eors r1,r1,r9 + eors r9,r9,r7 + eors r5,r5,r3 + mvns r10,r1 + mvns r11,r7 + mvns r12,r9 + ands r10,r10,r3 + ands r11,r11,r9 + eors r9,r9,r10 + ands r12,r12,r1 + mvns r10,r5 + ands r10,r10,r7 + eors r7,r7,r12 + mvns r12,r3 + ands r12,r12,r5 + eors r5,r5,r11 + eors r7,r7,r5 + mvns r5,r5 + eors r1,r1,r12 + eors r3,r3,r10 + eors r3,r3,r1 + eors r1,r1,r9 + + + // linear diffusion layer + + //c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7); + //c4 high part + rors r11,r9,#(20) + eors r9,r11,r9 + rors r10,r8,#(4) + eors r9,r10,r9 + //c4 low part + rors r11,r11,#((32-20+3)%32) + eors r11,r11,r8 + rors r10,r8,#(20) + eors r8,r10,r11 + + //c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19); + //c0 high part + rors r11,r1,#(14) + eors r1,r11,r1 + rors r10,r0,#(10) + eors r1,r10,r1 + //c0 low part + rors r11,r11,#((32-14+9)%32) + eors r11,r11,r0 + rors r10,r0,#(14) + eors r0,r10,r11 + + //c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61); + //c1 high part + rors r11,r3,#(19) + eors r3,r11,r3 + rors r10,r2,#(31) + eors r3,r10,r3 + //c1 low part + rors r11,r11,#((32-19+30)%32) + eors r11,r11,r2 + rors r10,r2,#(19) + eors r2,r10,r11 + + //c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1); + //c2 high part + rors r11,r5,#(3) + eors r5,r11,r5 + rors r10,r4,#(1) + eors r5,r10,r5 + //c2 low part + rors r11,r11,#((32-3+0)%32) + eors r11,r11,r4 + rors r10,r4,#(3) + eors r4,r10,r11 + + //c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17); + //c3 high part + rors r11,r7,#(5) + eors r7,r11,r7 + rors r10,r6,#(9) + eors r7,r10,r7 + //c3 low part + rors r11,r11,#((32-5+8)%32) + eors r11,r11,r6 + rors r10,r6,#(5) + eors r6,r10,r11 + + //state: + //r0 to r9: c + //r10,r11,r12 destroyed + + ldr r10,[sp,#16] + cmp r10,#130 + bne drygascon128_f_v7m_fpu_mix128_main_loop + //prepare the last loop: load DS 2 msb + ldr r14,[sp,4] + lsr r14,r14,#2 + b drygascon128_f_v7m_fpu_mix128_main_loop.core + +drygascon128_f_v7m_fpu_mix128_exit: + ldr r14,[sp,#12] + //round=r10=rounds-1; + subs r11,r14,#1 + //base = round_cst+12-rounds + adr r10, round_cst + adds r10,r10,#12 + subs r10,r10,r14 + + str r10,[sp,#12] + str r11,[sp,#16] + + add sp,sp,#8 + b drygascon128_g_v7m_fpu_main_loop + +.align 2 +round_cst: +.byte 0x4b +.byte 0x5a +.byte 0x69 +.byte 0x78 +.byte 0x87 +.byte 0x96 +.byte 0xa5 +.byte 0xb4 +.byte 0xc3 +.byte 0xd2 +.byte 0xe1 +.byte 0xf0 +.align 2 + +.size drygascon128_f_v7m_fpu, .-drygascon128_f_v7m_fpu + +.type drygascon128_g0_v7m_fpu, %function +drygascon128_g0_v7m_fpu: + //perform a single round without accumulate + //r0: state + push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + + //Load C + adds r14,r0,#C0 + LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + //r0~r9: c + + //r11 = ((0xf - 0) << 4) | 0; + movs r11,#0xf0 + //state: + //r0 to r9: c + //r11: constant to add as round constant + + // addition of round constant + //C2L ^= round constant; + eors r4,r4,r11 + + // substitution layer, lower half + eors r0,r0,r8 + eors r8,r8,r6 + eors r4,r4,r2 + mvns r10,r0 + mvns r11,r6 + mvns r12,r8 + ands r10,r10,r2 + ands r11,r11,r8 + eors r8,r8,r10 + ands r12,r12,r0 + mvns r10,r4 + ands r10,r10,r6 + eors r6,r6,r12 + mvns r12,r2 + ands r12,r12,r4 + eors r4,r4,r11 + eors r6,r6,r4 + mvns r4,r4 + eors r0,r0,r12 + eors r2,r2,r10 + eors r2,r2,r0 + eors r0,r0,r8 + + // substitution layer, upper half + eors r1,r1,r9 + eors r9,r9,r7 + eors r5,r5,r3 + mvns r10,r1 + mvns r11,r7 + mvns r12,r9 + ands r10,r10,r3 + ands r11,r11,r9 + eors r9,r9,r10 + ands r12,r12,r1 + mvns r10,r5 + ands r10,r10,r7 + eors r7,r7,r12 + mvns r12,r3 + ands r12,r12,r5 + eors r5,r5,r11 + eors r7,r7,r5 + mvns r5,r5 + eors r1,r1,r12 + eors r3,r3,r10 + eors r3,r3,r1 + eors r1,r1,r9 + + + // linear diffusion layer + + //c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7); + //c4 high part + rors r11,r9,#(20) + eors r9,r11,r9 + rors r10,r8,#(4) + eors r9,r10,r9 + //c4 low part + rors r11,r11,#((32-20+3)%32) + eors r11,r11,r8 + rors r10,r8,#(20) + eors r8,r10,r11 + + //c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19); + //c0 high part + rors r11,r1,#(14) + eors r1,r11,r1 + rors r10,r0,#(10) + eors r1,r10,r1 + //c0 low part + rors r11,r11,#((32-14+9)%32) + eors r11,r11,r0 + rors r10,r0,#(14) + eors r0,r10,r11 + + //c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61); + //c1 high part + rors r11,r3,#(19) + eors r3,r11,r3 + rors r10,r2,#(31) + eors r3,r10,r3 + //c1 low part + rors r11,r11,#((32-19+30)%32) + eors r11,r11,r2 + rors r10,r2,#(19) + eors r2,r10,r11 + + //c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1); + //c2 high part + rors r11,r5,#(3) + eors r5,r11,r5 + rors r10,r4,#(1) + eors r5,r10,r5 + //c2 low part + rors r11,r11,#((32-3+0)%32) + eors r11,r11,r4 + rors r10,r4,#(3) + eors r4,r10,r11 + + //c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17); + //c3 high part + rors r11,r7,#(5) + eors r7,r11,r7 + rors r10,r6,#(9) + eors r7,r10,r7 + //c3 low part + rors r11,r11,#((32-5+8)%32) + eors r11,r11,r6 + rors r10,r6,#(5) + eors r6,r10,r11 + + //state: + //r0 to r9: c + //r10,r11,r12 destroyed + + //update C + STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} +.size drygascon128_g0_v7m_fpu, .-drygascon128_g0_v7m_fpu +#endif diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu_x.S b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu_x.S new file mode 100644 index 0000000..53472ea --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu_x.S @@ -0,0 +1,690 @@ +/** +DryGascon128 'v7m_fpu_x implementation' +Sebastien Riou, May 27th 2020 + +Implementation optimized for ARM-Cortex-M7/M4/M3 (Size and Speed) +Include protection against timing attack on X look up operations + +Note that implementation 'v7m_fpu' is faster and safe on all Cortex-M7 as of May 2020. +*/ +#if defined(__DRYGASCON_ARM_SELECTOR_H__) +.cpu cortex-m7 +.syntax unified +.code 16 +.thumb_func + +.align 1 +.global drygascon128_g_v7m_fpu_x +.global drygascon128_f_v7m_fpu_x +.global drygascon128_g0_v7m_fpu_x + + .equ C0, 0 + .equ C1, C0+8 + .equ C2, C0+16 + .equ C3, C0+24 + .equ C4, C0+32 + .equ R0, 48 + .equ R1, R0+8 + .equ X0, 64 + .equ X1, X0+8 + + .equ X0L, X0 + .equ X1L, X1 + .equ C0L, C0 + .equ C1L, C1 + .equ C2L, C2 + .equ C3L, C3 + .equ C4L, C4 + .equ R0L, R0 + .equ R1L, R1 + + .equ X0H, X0+4 + .equ X1H, X1+4 + .equ C0H, C0+4 + .equ C1H, C1+4 + .equ C2H, C2+4 + .equ C3H, C3+4 + .equ C4H, C4+4 + .equ R0H, R0+4 + .equ R1H, R1+4 + + .equ R32_0, R0L + .equ R32_1, R0H + .equ R32_2, R1L + .equ R32_3, R1H + + + +.type drygascon128_g_v7m_fpu_x, %function +drygascon128_g_v7m_fpu_x: + //r0: state: c,r,x + //r1: rounds + push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + //stack vars: + // 8 round + // 4 rounds (base address for lookups) + // 0 state address + + //r=0 + VSUB.F32 S10, S10, S10 + VSUB.F32 S11, S11, S11 + VSUB.F32 S12, S12, S12 + VSUB.F32 S13, S13, S13 + + //round=r10=rounds-1; + subs r11,r1,#1 + //base = round_cst+12-rounds + adr r10, round_cst + adds r10,r10,#12 + subs r10,r10,r1 + + push {r0,r10,r11} + + //Load C + adds r14,r0,#C0 + LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + //loop entry + //assume r11>0 at entry +drygascon128_g_v7m_fpu_x_main_loop: + //r0~r9: c + //r10: base for round constants + //r11: round, counting from rounds-1 to 0 + + //r11 = ((0xf - r11) << 4) | r11; + ldrb r11,[r10,r11] + //state: + //r0 to r9: c + //r11: constant to add as round constant + //FPU: + //s11 to s14: r + + // addition of round constant + //C2L ^= round constant; + eors r4,r4,r11 + + // substitution layer, lower half + eors r0,r0,r8 + eors r8,r8,r6 + eors r4,r4,r2 + mvns r10,r0 + mvns r11,r6 + mvns r12,r8 + ands r10,r10,r2 + ands r11,r11,r8 + eors r8,r8,r10 + ands r12,r12,r0 + mvns r10,r4 + ands r10,r10,r6 + eors r6,r6,r12 + mvns r12,r2 + ands r12,r12,r4 + eors r4,r4,r11 + eors r6,r6,r4 + mvns r4,r4 + eors r0,r0,r12 + eors r2,r2,r10 + eors r2,r2,r0 + eors r0,r0,r8 + + // substitution layer, upper half + eors r1,r1,r9 + eors r9,r9,r7 + eors r5,r5,r3 + mvns r10,r1 + mvns r11,r7 + mvns r12,r9 + ands r10,r10,r3 + ands r11,r11,r9 + eors r9,r9,r10 + ands r12,r12,r1 + mvns r10,r5 + ands r10,r10,r7 + eors r7,r7,r12 + mvns r12,r3 + ands r12,r12,r5 + eors r5,r5,r11 + eors r7,r7,r5 + mvns r5,r5 + eors r1,r1,r12 + eors r3,r3,r10 + eors r3,r3,r1 + eors r1,r1,r9 + + + // linear diffusion layer + + //c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7); + //c4 high part + rors r11,r9,#(20) + eors r9,r11,r9 + rors r10,r8,#(4) + eors r9,r10,r9 + //c4 low part + rors r11,r11,#((32-20+3)%32) + eors r11,r11,r8 + rors r10,r8,#(20) + eors r8,r10,r11 + + vmov r14,S11 + //c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19); + //c0 high part + rors r11,r1,#(14) + eors r1,r11,r1 + rors r10,r0,#(10) + eors r1,r10,r1 + //r14 is R32_1 + eors r14,r14,r1 + vmov r12,S10 + //c0 low part + rors r11,r11,#((32-14+9)%32) + eors r11,r11,r0 + rors r10,r0,#(14) + eors r0,r10,r11 + //r12 is R32_0 + eors r12,r12,r0 + //c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1); + //c2 high part + rors r11,r5,#(3) + eors r5,r11,r5 + rors r10,r4,#(1) + eors r5,r10,r5 + //r12 is R32_0 + eors r12,r12,r5 + vmov S10,r12 + vmov r12,S13 + //c2 low part + rors r11,r11,#((32-3+0)%32) + eors r11,r11,r4 + rors r10,r4,#(3) + eors r4,r10,r11 + //r12 is R32_3 + eors r12,r12,r4 + //c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61); + //c1 high part + rors r11,r3,#(19) + eors r3,r11,r3 + rors r10,r2,#(31) + eors r3,r10,r3 + //r12 is R32_3 + eors r12,r12,r3 + vmov S13,r12 + vmov r12,S12 + //c1 low part + rors r11,r11,#((32-19+30)%32) + eors r11,r11,r2 + rors r10,r2,#(19) + eors r2,r10,r11 + //r12 is R32_2 + eors r12,r12,r2 + //c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17); + //c3 high part + rors r11,r7,#(5) + eors r7,r11,r7 + rors r10,r6,#(9) + eors r7,r10,r7 + //r12 is R32_2 + eors r12,r12,r7 + vmov S12,r12 + //c3 low part + rors r11,r11,#((32-5+8)%32) + eors r11,r11,r6 + rors r10,r6,#(5) + eors r6,r10,r11 + //r14 is R32_1 + eors r14,r14,r6 + vmov S11,r14 + + //state: + //r0 to r9: c + //r10,r11,r12 destroyed + + ldr r10,[sp,#4] + + ldr r11,[sp,#8] + subs r11,#1 + bmi drygascon128_g_v7m_fpu_x_exit + + str r11,[sp,#8] + b drygascon128_g_v7m_fpu_x_main_loop +drygascon128_g_v7m_fpu_x_exit: + //update C + ldr r14,[sp,#0] + STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + //update R + ldr r11,[sp,#0] + adds r11,r11,#R0 + VSTMIA.F32 r11, {S10,S11,S12,S13} + + add sp,sp,#12 + pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} +.size drygascon128_g_v7m_fpu_x, .-drygascon128_g_v7m_fpu_x + +.align 2 +.type drygascon128_f_v7m_fpu_x, %function +drygascon128_f_v7m_fpu_x: + //r0:state + //r1:input + //r2:ds + //r3:rounds + push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + + //stack frame: + //0: pointer on input + //4: DS value + //8 :pointer on state + //12 : rounds for g + //16 :mix round / g round + + movs r10,#0 //init of input bit counter + push {r0,r3,r10} //make the same stack frame as drygascon128_g_cm7 + push {r1,r2} + //r=0 + VSUB.F32 S10, S10, S10 + VSUB.F32 S11, S11, S11 + VSUB.F32 S12, S12, S12 + VSUB.F32 S13, S13, S13 + + //Load C + adds r11,r0,#C0 + LDMIA.W r11,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + //Load X + adds r11,#X0 + VLDMIA.F32 r11, {s0,s1,s2,s3} + +drygascon128_f_v7m_fpu_x_mix128_main_loop: + //r10 is input bit counter + ldr r11,[sp,#0] //r11 is pointer on input + + //r10 r12 shift + // 0 0 0 + // 10 1 2 + // 20 2 4 + // 30 3 6 + // 40 5 0 + // 50 6 2 + // 60 7 4 + // 70 8 6 + // 80 10 0 + // 90 11 2 + // 100 12 4 + // 110 13 6 + // 120 15 0 + // 130 16 2 --> we do that operation for 2 last bits in a special last loop + + cmp r10,#120 + bne drygascon128_f_v7m_fpu_x_mix128_main_loop.regular + + //we execute this only during the pen-ultimate operation + //we add the 2 lsb from DS to r14 + ldrb r14,[r11,#15] + ldr r10,[sp,#4] + lsl r10,r10,#8 + eors r14,r14,r10 + b drygascon128_f_v7m_fpu_x_mix128_main_loop.core + +drygascon128_f_v7m_fpu_x_mix128_main_loop.regular: + //r12 is base byte: byte offset to read from input buffer + lsr r12,r10,#3 //divide by 8 to get base byte + //r10 becomes shift + lsl r14,r12,#3 + sub r10,r10,r14 + + ldr r14,[r11,r12] //M7 supports unalign access with ldr + lsr r14,r14,r10 + +drygascon128_f_v7m_fpu_x_mix128_main_loop.core: + + tst r14,#2 + VSELEQ.F64 D2, D0, D1 + tst r14,#1 + VSELEQ.F32 S6, S4, S5 + VMOV r11,S6 + eors r0,r0,r11 + + tst r14,#8 + VSELEQ.F64 D2, D0, D1 + tst r14,#4 + VSELEQ.F32 S6, S4, S5 + VMOV r11,S6 + eors r2,r2,r11 + + tst r14,#32 + VSELEQ.F64 D2, D0, D1 + tst r14,#16 + VSELEQ.F32 S6, S4, S5 + VMOV r11,S6 + eors r4,r4,r11 + + tst r14,#128 + VSELEQ.F64 D2, D0, D1 + tst r14,#64 + VSELEQ.F32 S6, S4, S5 + VMOV r11,S6 + eors r6,r6,r11 + + tst r14,#512 + VSELEQ.F64 D2, D0, D1 + tst r14,#256 + VSELEQ.F32 S6, S4, S5 + VMOV r11,S6 + eors r8,r8,r11 + + ldr r10,[sp,#16] + adds r10,#10 + cmp r10,#140 + beq drygascon128_f_v7m_fpu_x_mix128_exit +drygascon128_f_v7m_fpu_x_mix128_coreround: + str r10,[sp,#16] + + movs r11,#0xf0 + + //state: + //r0 to r9: c + //r11: constant to add as round constant + + // addition of round constant + //C2L ^= round constant; + eors r4,r4,r11 + + // substitution layer, lower half + eors r0,r0,r8 + eors r8,r8,r6 + eors r4,r4,r2 + mvns r10,r0 + mvns r11,r6 + mvns r12,r8 + ands r10,r10,r2 + ands r11,r11,r8 + eors r8,r8,r10 + ands r12,r12,r0 + mvns r10,r4 + ands r10,r10,r6 + eors r6,r6,r12 + mvns r12,r2 + ands r12,r12,r4 + eors r4,r4,r11 + eors r6,r6,r4 + mvns r4,r4 + eors r0,r0,r12 + eors r2,r2,r10 + eors r2,r2,r0 + eors r0,r0,r8 + + // substitution layer, upper half + eors r1,r1,r9 + eors r9,r9,r7 + eors r5,r5,r3 + mvns r10,r1 + mvns r11,r7 + mvns r12,r9 + ands r10,r10,r3 + ands r11,r11,r9 + eors r9,r9,r10 + ands r12,r12,r1 + mvns r10,r5 + ands r10,r10,r7 + eors r7,r7,r12 + mvns r12,r3 + ands r12,r12,r5 + eors r5,r5,r11 + eors r7,r7,r5 + mvns r5,r5 + eors r1,r1,r12 + eors r3,r3,r10 + eors r3,r3,r1 + eors r1,r1,r9 + + + // linear diffusion layer + + //c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7); + //c4 high part + rors r11,r9,#(20) + eors r9,r11,r9 + rors r10,r8,#(4) + eors r9,r10,r9 + //c4 low part + rors r11,r11,#((32-20+3)%32) + eors r11,r11,r8 + rors r10,r8,#(20) + eors r8,r10,r11 + + //c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19); + //c0 high part + rors r11,r1,#(14) + eors r1,r11,r1 + rors r10,r0,#(10) + eors r1,r10,r1 + //c0 low part + rors r11,r11,#((32-14+9)%32) + eors r11,r11,r0 + rors r10,r0,#(14) + eors r0,r10,r11 + + //c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61); + //c1 high part + rors r11,r3,#(19) + eors r3,r11,r3 + rors r10,r2,#(31) + eors r3,r10,r3 + //c1 low part + rors r11,r11,#((32-19+30)%32) + eors r11,r11,r2 + rors r10,r2,#(19) + eors r2,r10,r11 + + //c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1); + //c2 high part + rors r11,r5,#(3) + eors r5,r11,r5 + rors r10,r4,#(1) + eors r5,r10,r5 + //c2 low part + rors r11,r11,#((32-3+0)%32) + eors r11,r11,r4 + rors r10,r4,#(3) + eors r4,r10,r11 + + //c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17); + //c3 high part + rors r11,r7,#(5) + eors r7,r11,r7 + rors r10,r6,#(9) + eors r7,r10,r7 + //c3 low part + rors r11,r11,#((32-5+8)%32) + eors r11,r11,r6 + rors r10,r6,#(5) + eors r6,r10,r11 + + //state: + //r0 to r9: c + //r10,r11,r12 destroyed + + ldr r10,[sp,#16] + cmp r10,#130 + bne drygascon128_f_v7m_fpu_x_mix128_main_loop + //prepare the last loop: load DS 2 msb + ldr r14,[sp,4] + lsr r14,r14,#2 + b drygascon128_f_v7m_fpu_x_mix128_main_loop.core + +drygascon128_f_v7m_fpu_x_mix128_exit: + ldr r14,[sp,#12] + //round=r10=rounds-1; + subs r11,r14,#1 + //base = round_cst+12-rounds + adr r10, round_cst + adds r10,r10,#12 + subs r10,r10,r14 + + str r10,[sp,#12] + str r11,[sp,#16] + + add sp,sp,#8 + b drygascon128_g_v7m_fpu_x_main_loop + +.align 2 +round_cst: +.byte 0x4b +.byte 0x5a +.byte 0x69 +.byte 0x78 +.byte 0x87 +.byte 0x96 +.byte 0xa5 +.byte 0xb4 +.byte 0xc3 +.byte 0xd2 +.byte 0xe1 +.byte 0xf0 +.align 2 + +.size drygascon128_f_v7m_fpu_x, .-drygascon128_f_v7m_fpu_x + +.type drygascon128_g0_v7m_fpu_x, %function +drygascon128_g0_v7m_fpu_x: + //perform a single round without accumulate + //r0: state + push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + + //Load C + adds r14,r0,#C0 + LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + //r0~r9: c + + //r11 = ((0xf - 0) << 4) | 0; + movs r11,#0xf0 + //state: + //r0 to r9: c + //r11: constant to add as round constant + + // addition of round constant + //C2L ^= round constant; + eors r4,r4,r11 + + // substitution layer, lower half + eors r0,r0,r8 + eors r8,r8,r6 + eors r4,r4,r2 + mvns r10,r0 + mvns r11,r6 + mvns r12,r8 + ands r10,r10,r2 + ands r11,r11,r8 + eors r8,r8,r10 + ands r12,r12,r0 + mvns r10,r4 + ands r10,r10,r6 + eors r6,r6,r12 + mvns r12,r2 + ands r12,r12,r4 + eors r4,r4,r11 + eors r6,r6,r4 + mvns r4,r4 + eors r0,r0,r12 + eors r2,r2,r10 + eors r2,r2,r0 + eors r0,r0,r8 + + // substitution layer, upper half + eors r1,r1,r9 + eors r9,r9,r7 + eors r5,r5,r3 + mvns r10,r1 + mvns r11,r7 + mvns r12,r9 + ands r10,r10,r3 + ands r11,r11,r9 + eors r9,r9,r10 + ands r12,r12,r1 + mvns r10,r5 + ands r10,r10,r7 + eors r7,r7,r12 + mvns r12,r3 + ands r12,r12,r5 + eors r5,r5,r11 + eors r7,r7,r5 + mvns r5,r5 + eors r1,r1,r12 + eors r3,r3,r10 + eors r3,r3,r1 + eors r1,r1,r9 + + + // linear diffusion layer + + //c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7); + //c4 high part + rors r11,r9,#(20) + eors r9,r11,r9 + rors r10,r8,#(4) + eors r9,r10,r9 + //c4 low part + rors r11,r11,#((32-20+3)%32) + eors r11,r11,r8 + rors r10,r8,#(20) + eors r8,r10,r11 + + //c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19); + //c0 high part + rors r11,r1,#(14) + eors r1,r11,r1 + rors r10,r0,#(10) + eors r1,r10,r1 + //c0 low part + rors r11,r11,#((32-14+9)%32) + eors r11,r11,r0 + rors r10,r0,#(14) + eors r0,r10,r11 + + //c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61); + //c1 high part + rors r11,r3,#(19) + eors r3,r11,r3 + rors r10,r2,#(31) + eors r3,r10,r3 + //c1 low part + rors r11,r11,#((32-19+30)%32) + eors r11,r11,r2 + rors r10,r2,#(19) + eors r2,r10,r11 + + //c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1); + //c2 high part + rors r11,r5,#(3) + eors r5,r11,r5 + rors r10,r4,#(1) + eors r5,r10,r5 + //c2 low part + rors r11,r11,#((32-3+0)%32) + eors r11,r11,r4 + rors r10,r4,#(3) + eors r4,r10,r11 + + //c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17); + //c3 high part + rors r11,r7,#(5) + eors r7,r11,r7 + rors r10,r6,#(9) + eors r7,r10,r7 + //c3 low part + rors r11,r11,#((32-5+8)%32) + eors r11,r11,r6 + rors r10,r6,#(5) + eors r6,r10,r11 + + //state: + //r0 to r9: c + //r10,r11,r12 destroyed + + //update C + STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} + + pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} +.size drygascon128_g0_v7m_fpu_x, .-drygascon128_g0_v7m_fpu_x +#endif diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm_selector.h b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm_selector.h new file mode 100644 index 0000000..fb2275a --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm_selector.h @@ -0,0 +1,48 @@ +#ifndef __DRYGASCON_ARM_SELECTOR_H__ +#define __DRYGASCON_ARM_SELECTOR_H__ +//Optional file to select the best implementation for each chip + +#ifdef STM32H743xx + #define __DRYGASCON_ARM_SELECTOR_V7M__ + #define __DRYGASCON_ARM_SELECTOR_FPU__ +#endif + +#ifdef STM32F746xx + #define __DRYGASCON_ARM_SELECTOR_V7M__ + #define __DRYGASCON_ARM_SELECTOR_FPU__ +#endif + +#ifdef STM32F103xx + #define __DRYGASCON_ARM_SELECTOR_V7M__ +#endif + +#ifdef STM32L011xx + #define __DRYGASCON_ARM_SELECTOR_V6M__ +#endif + +#ifdef __SAM3X8E__ + #define __DRYGASCON_ARM_SELECTOR_V7M__ +#endif + +//TODO: add more chips here + +#ifdef __DRYGASCON_ARM_SELECTOR_V7M__ + #ifdef __DRYGASCON_ARM_SELECTOR_FPU__ + #define DRYGASCON_G_OPT drygascon128_g_v7m_fpu + #define DRYGASCON_F_OPT drygascon128_f_v7m_fpu + #define DRYGASCON_G0_OPT drygascon128_g0_v7m_fpu + #else + #define DRYGASCON_G_OPT drygascon128_g_v7m + #define DRYGASCON_F_OPT drygascon128_f_v7m + #define DRYGASCON_G0_OPT drygascon128_g0_v7m + #endif +#endif + +#ifdef __DRYGASCON_ARM_SELECTOR_V6M__ + #define DRYGASCON_G_OPT drygascon128_g_v6m + #define DRYGASCON_F_OPT drygascon128_f_v6m + //#define DRYGASCON_G0_OPT drygascon128_g0_v6m + #define DRYGASCON_ALIGN_INPUT_32 +#endif + +#endif diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/encrypt.c b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/encrypt.c new file mode 100644 index 0000000..ca1e9f9 --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/encrypt.c @@ -0,0 +1,25 @@ +#include "drygascon.h" + +int crypto_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + return drygascon128_aead_encrypt + (c, clen, m, mlen, ad, adlen, nsec, npub, k); +} + +int crypto_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + return drygascon128_aead_decrypt + (m, mlen, nsec, c, clen, ad, adlen, npub, k); +} diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/implementors b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/implementors new file mode 100644 index 0000000..454ca31 --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/implementors @@ -0,0 +1,2 @@ +Rhys Weatherley +Sebastien Riou diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.c b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.c new file mode 100644 index 0000000..05284fb --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.c @@ -0,0 +1,681 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "internal-drysponge.h" +#include + +#if !defined(__AVR__) + +/* Right rotations in bit-interleaved format */ +#define intRightRotateEven(x,bits) \ + (__extension__ ({ \ + uint32_t _x0 = (uint32_t)(x); \ + uint32_t _x1 = (uint32_t)((x) >> 32); \ + _x0 = rightRotate(_x0, (bits)); \ + _x1 = rightRotate(_x1, (bits)); \ + _x0 | (((uint64_t)_x1) << 32); \ + })) +#define intRightRotateOdd(x,bits) \ + (__extension__ ({ \ + uint32_t _x0 = (uint32_t)(x); \ + uint32_t _x1 = (uint32_t)((x) >> 32); \ + _x0 = rightRotate(_x0, ((bits) + 1) % 32); \ + _x1 = rightRotate(_x1, (bits)); \ + _x1 | (((uint64_t)_x0) << 32); \ + })) +#define intRightRotate1_64(x) \ + (__extension__ ({ \ + uint32_t _x0 = (uint32_t)(x); \ + uint32_t _x1 = (uint32_t)((x) >> 32); \ + _x0 = rightRotate1(_x0); \ + _x1 | (((uint64_t)_x0) << 32); \ + })) +#define intRightRotate2_64(x) (intRightRotateEven((x), 1)) +#define intRightRotate3_64(x) (intRightRotateOdd((x), 1)) +#define intRightRotate4_64(x) (intRightRotateEven((x), 2)) +#define intRightRotate5_64(x) (intRightRotateOdd((x), 2)) +#define intRightRotate6_64(x) (intRightRotateEven((x), 3)) +#define intRightRotate7_64(x) (intRightRotateOdd((x), 3)) +#define intRightRotate8_64(x) (intRightRotateEven((x), 4)) +#define intRightRotate9_64(x) (intRightRotateOdd((x), 4)) +#define intRightRotate10_64(x) (intRightRotateEven((x), 5)) +#define intRightRotate11_64(x) (intRightRotateOdd((x), 5)) +#define intRightRotate12_64(x) (intRightRotateEven((x), 6)) +#define intRightRotate13_64(x) (intRightRotateOdd((x), 6)) +#define intRightRotate14_64(x) (intRightRotateEven((x), 7)) +#define intRightRotate15_64(x) (intRightRotateOdd((x), 7)) +#define intRightRotate16_64(x) (intRightRotateEven((x), 8)) +#define intRightRotate17_64(x) (intRightRotateOdd((x), 8)) +#define intRightRotate18_64(x) (intRightRotateEven((x), 9)) +#define intRightRotate19_64(x) (intRightRotateOdd((x), 9)) +#define intRightRotate20_64(x) (intRightRotateEven((x), 10)) +#define intRightRotate21_64(x) (intRightRotateOdd((x), 10)) +#define intRightRotate22_64(x) (intRightRotateEven((x), 11)) +#define intRightRotate23_64(x) (intRightRotateOdd((x), 11)) +#define intRightRotate24_64(x) (intRightRotateEven((x), 12)) +#define intRightRotate25_64(x) (intRightRotateOdd((x), 12)) +#define intRightRotate26_64(x) (intRightRotateEven((x), 13)) +#define intRightRotate27_64(x) (intRightRotateOdd((x), 13)) +#define intRightRotate28_64(x) (intRightRotateEven((x), 14)) +#define intRightRotate29_64(x) (intRightRotateOdd((x), 14)) +#define intRightRotate30_64(x) (intRightRotateEven((x), 15)) +#define intRightRotate31_64(x) (intRightRotateOdd((x), 15)) +#define intRightRotate32_64(x) (intRightRotateEven((x), 16)) +#define intRightRotate33_64(x) (intRightRotateOdd((x), 16)) +#define intRightRotate34_64(x) (intRightRotateEven((x), 17)) +#define intRightRotate35_64(x) (intRightRotateOdd((x), 17)) +#define intRightRotate36_64(x) (intRightRotateEven((x), 18)) +#define intRightRotate37_64(x) (intRightRotateOdd((x), 18)) +#define intRightRotate38_64(x) (intRightRotateEven((x), 19)) +#define intRightRotate39_64(x) (intRightRotateOdd((x), 19)) +#define intRightRotate40_64(x) (intRightRotateEven((x), 20)) +#define intRightRotate41_64(x) (intRightRotateOdd((x), 20)) +#define intRightRotate42_64(x) (intRightRotateEven((x), 21)) +#define intRightRotate43_64(x) (intRightRotateOdd((x), 21)) +#define intRightRotate44_64(x) (intRightRotateEven((x), 22)) +#define intRightRotate45_64(x) (intRightRotateOdd((x), 22)) +#define intRightRotate46_64(x) (intRightRotateEven((x), 23)) +#define intRightRotate47_64(x) (intRightRotateOdd((x), 23)) +#define intRightRotate48_64(x) (intRightRotateEven((x), 24)) +#define intRightRotate49_64(x) (intRightRotateOdd((x), 24)) +#define intRightRotate50_64(x) (intRightRotateEven((x), 25)) +#define intRightRotate51_64(x) (intRightRotateOdd((x), 25)) +#define intRightRotate52_64(x) (intRightRotateEven((x), 26)) +#define intRightRotate53_64(x) (intRightRotateOdd((x), 26)) +#define intRightRotate54_64(x) (intRightRotateEven((x), 27)) +#define intRightRotate55_64(x) (intRightRotateOdd((x), 27)) +#define intRightRotate56_64(x) (intRightRotateEven((x), 28)) +#define intRightRotate57_64(x) (intRightRotateOdd((x), 28)) +#define intRightRotate58_64(x) (intRightRotateEven((x), 29)) +#define intRightRotate59_64(x) (intRightRotateOdd((x), 29)) +#define intRightRotate60_64(x) (intRightRotateEven((x), 30)) +#define intRightRotate61_64(x) (intRightRotateOdd((x), 30)) +#define intRightRotate62_64(x) (intRightRotateEven((x), 31)) +#define intRightRotate63_64(x) (intRightRotateOdd((x), 31)) + +#ifdef DRYGASCON_G0_OPT +void DRYGASCON_G0_OPT(drysponge128_state_t *state); +static void gascon128_g0(drysponge128_state_t *state){ + DRYGASCON_G0_OPT(state); +} +#else +void gascon128_core_round(gascon128_state_t *state, uint8_t round) +{ + uint64_t t0, t1, t2, t3, t4; + + /* Load the state into local varaibles */ +#if defined(LW_UTIL_LITTLE_ENDIAN) + uint64_t x0 = state->S[0]; + uint64_t x1 = state->S[1]; + uint64_t x2 = state->S[2]; + uint64_t x3 = state->S[3]; + uint64_t x4 = state->S[4]; +#else + uint64_t x0 = le_load_word64(state->B); + uint64_t x1 = le_load_word64(state->B + 8); + uint64_t x2 = le_load_word64(state->B + 16); + uint64_t x3 = le_load_word64(state->B + 24); + uint64_t x4 = le_load_word64(state->B + 32); +#endif + + /* Add the round constant to the middle of the state */ + x2 ^= ((0x0F - round) << 4) | round; + + /* Substitution layer */ + x0 ^= x4; x2 ^= x1; x4 ^= x3; t0 = (~x0) & x1; t1 = (~x1) & x2; + t2 = (~x2) & x3; t3 = (~x3) & x4; t4 = (~x4) & x0; x0 ^= t1; + x1 ^= t2; x2 ^= t3; x3 ^= t4; x4 ^= t0; x1 ^= x0; x3 ^= x2; + x0 ^= x4; x2 = ~x2; + + /* Linear diffusion layer */ + x0 ^= intRightRotate19_64(x0) ^ intRightRotate28_64(x0); + x1 ^= intRightRotate61_64(x1) ^ intRightRotate38_64(x1); + x2 ^= intRightRotate1_64(x2) ^ intRightRotate6_64(x2); + x3 ^= intRightRotate10_64(x3) ^ intRightRotate17_64(x3); + x4 ^= intRightRotate7_64(x4) ^ intRightRotate40_64(x4); + + /* Write the local variables back to the state */ +#if defined(LW_UTIL_LITTLE_ENDIAN) + state->S[0] = x0; + state->S[1] = x1; + state->S[2] = x2; + state->S[3] = x3; + state->S[4] = x4; +#else + le_store_word64(state->B, x0); + le_store_word64(state->B + 8, x1); + le_store_word64(state->B + 16, x2); + le_store_word64(state->B + 24, x3); + le_store_word64(state->B + 32, x4); +#endif +} + +static void gascon128_g0(drysponge128_state_t *state){ + gascon128_core_round(&(state->c), 0); +} +#endif + +void gascon256_core_round(gascon256_state_t *state, uint8_t round) +{ + uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8; + + /* Load the state into local varaibles */ +#if defined(LW_UTIL_LITTLE_ENDIAN) + uint64_t x0 = state->S[0]; + uint64_t x1 = state->S[1]; + uint64_t x2 = state->S[2]; + uint64_t x3 = state->S[3]; + uint64_t x4 = state->S[4]; + uint64_t x5 = state->S[5]; + uint64_t x6 = state->S[6]; + uint64_t x7 = state->S[7]; + uint64_t x8 = state->S[8]; +#else + uint64_t x0 = le_load_word64(state->B); + uint64_t x1 = le_load_word64(state->B + 8); + uint64_t x2 = le_load_word64(state->B + 16); + uint64_t x3 = le_load_word64(state->B + 24); + uint64_t x4 = le_load_word64(state->B + 32); + uint64_t x5 = le_load_word64(state->B + 40); + uint64_t x6 = le_load_word64(state->B + 48); + uint64_t x7 = le_load_word64(state->B + 56); + uint64_t x8 = le_load_word64(state->B + 64); +#endif + + /* Add the round constant to the middle of the state */ + x4 ^= ((0x0F - round) << 4) | round; + + /* Substitution layer */ + x0 ^= x8; x2 ^= x1; x4 ^= x3; x6 ^= x5; x8 ^= x7; t0 = (~x0) & x1; + t1 = (~x1) & x2; t2 = (~x2) & x3; t3 = (~x3) & x4; t4 = (~x4) & x5; + t5 = (~x5) & x6; t6 = (~x6) & x7; t7 = (~x7) & x8; t8 = (~x8) & x0; + x0 ^= t1; x1 ^= t2; x2 ^= t3; x3 ^= t4; x4 ^= t5; x5 ^= t6; x6 ^= t7; + x7 ^= t8; x8 ^= t0; x1 ^= x0; x3 ^= x2; x5 ^= x4; x7 ^= x6; x0 ^= x8; + x4 = ~x4; + + /* Linear diffusion layer */ + x0 ^= intRightRotate19_64(x0) ^ intRightRotate28_64(x0); + x1 ^= intRightRotate61_64(x1) ^ intRightRotate38_64(x1); + x2 ^= intRightRotate1_64(x2) ^ intRightRotate6_64(x2); + x3 ^= intRightRotate10_64(x3) ^ intRightRotate17_64(x3); + x4 ^= intRightRotate7_64(x4) ^ intRightRotate40_64(x4); + x5 ^= intRightRotate31_64(x5) ^ intRightRotate26_64(x5); + x6 ^= intRightRotate53_64(x6) ^ intRightRotate58_64(x6); + x7 ^= intRightRotate9_64(x7) ^ intRightRotate46_64(x7); + x8 ^= intRightRotate43_64(x8) ^ intRightRotate50_64(x8); + + /* Write the local variables back to the state */ +#if defined(LW_UTIL_LITTLE_ENDIAN) + state->S[0] = x0; + state->S[1] = x1; + state->S[2] = x2; + state->S[3] = x3; + state->S[4] = x4; + state->S[5] = x5; + state->S[6] = x6; + state->S[7] = x7; + state->S[8] = x8; +#else + le_store_word64(state->B, x0); + le_store_word64(state->B + 8, x1); + le_store_word64(state->B + 16, x2); + le_store_word64(state->B + 24, x3); + le_store_word64(state->B + 32, x4); + le_store_word64(state->B + 40, x5); + le_store_word64(state->B + 48, x6); + le_store_word64(state->B + 56, x7); + le_store_word64(state->B + 64, x8); +#endif +} + +#ifdef DRYGASCON_G_OPT +void DRYGASCON_G_OPT(uint64_t* state, uint32_t rounds); +//use state only to access c,r,x +static void drysponge128_g_impl(drysponge128_state_t *state,unsigned int rounds) +{ + DRYGASCON_G_OPT((uint64_t*)state,rounds); +} +#else + +//use state only to access c,r,x +static void drysponge128_g_impl(drysponge128_state_t *state,unsigned int rounds) +{ + unsigned round; + + /* Perform the first round. For each round we XOR the 16 bytes of + * the output data with the first 16 bytes of the state. And then + * XOR with the next 16 bytes of the state, rotated by 4 bytes */ + gascon128_core_round(&(state->c), 0); + state->r.W[0] = state->c.W[0] ^ state->c.W[5]; + state->r.W[1] = state->c.W[1] ^ state->c.W[6]; + state->r.W[2] = state->c.W[2] ^ state->c.W[7]; + state->r.W[3] = state->c.W[3] ^ state->c.W[4]; + + /* Perform the rest of the rounds */ + for (round = 1; round < rounds; ++round) { + gascon128_core_round(&(state->c), round); + state->r.W[0] ^= state->c.W[0] ^ state->c.W[5]; + state->r.W[1] ^= state->c.W[1] ^ state->c.W[6]; + state->r.W[2] ^= state->c.W[2] ^ state->c.W[7]; + state->r.W[3] ^= state->c.W[3] ^ state->c.W[4]; + } +} +#endif +void drysponge128_g(drysponge128_state_t *state) +{ + drysponge128_g_impl(state,state->rounds); +} + +void drysponge256_g(drysponge256_state_t *state) +{ + unsigned round; + + /* Perform the first round. For each round we XOR the 16 bytes of + * the output data with the first 16 bytes of the state. And then + * XOR with the next 16 bytes of the state, rotated by 4 bytes. + * And so on for a total of 64 bytes XOR'ed into the output data. */ + gascon256_core_round(&(state->c), 0); + state->r.W[0] = state->c.W[0] ^ state->c.W[5] ^ + state->c.W[10] ^ state->c.W[15]; + state->r.W[1] = state->c.W[1] ^ state->c.W[6] ^ + state->c.W[11] ^ state->c.W[12]; + state->r.W[2] = state->c.W[2] ^ state->c.W[7] ^ + state->c.W[8] ^ state->c.W[13]; + state->r.W[3] = state->c.W[3] ^ state->c.W[4] ^ + state->c.W[9] ^ state->c.W[14]; + + /* Perform the rest of the rounds */ + for (round = 1; round < state->rounds; ++round) { + gascon256_core_round(&(state->c), round); + state->r.W[0] ^= state->c.W[0] ^ state->c.W[5] ^ + state->c.W[10] ^ state->c.W[15]; + state->r.W[1] ^= state->c.W[1] ^ state->c.W[6] ^ + state->c.W[11] ^ state->c.W[12]; + state->r.W[2] ^= state->c.W[2] ^ state->c.W[7] ^ + state->c.W[8] ^ state->c.W[13]; + state->r.W[3] ^= state->c.W[3] ^ state->c.W[4] ^ + state->c.W[9] ^ state->c.W[14]; + } +} + +#endif /* !__AVR__ */ + +#ifndef DRYGASCON_G_OPT +void drysponge128_g_core(drysponge128_state_t *state) +{ + unsigned round; + for (round = 0; round < state->rounds; ++round) + gascon128_core_round(&(state->c), round); +} +#endif + +void drysponge256_g_core(drysponge256_state_t *state) +{ + unsigned round; + for (round = 0; round < state->rounds; ++round) + gascon256_core_round(&(state->c), round); +} + +/** + * \fn uint32_t drysponge_select_x(const uint32_t x[4], uint8_t index) + * \brief Selects an element of x in constant time. + * + * \param x Points to the four elements of x. + * \param index Index of which element to extract between 0 and 3. + * + * \return The selected element of x. + */ +#if defined(__HAS_CACHE__) +STATIC_INLINE uint32_t drysponge_select_x(const uint32_t x[4], uint8_t index) +{ + /* We need to be careful how we select each element of x because + * we are doing a data-dependent fetch here. Do the fetch in a way + * that should avoid cache timing issues by fetching every element + * of x and masking away the ones we don't want. + * + * There is a possible side channel here with respect to power analysis. + * The "mask" value will be all-ones for the selected index and all-zeroes + * for the other indexes. This may show up as different power consumption + * for the "result ^= x[i] & mask" statement when i is the selected index. + * Such a side channel could in theory allow reading the plaintext input + * to the cipher by analysing the CPU's power consumption. + * + * The DryGASCON specification acknowledges the possibility of plaintext + * recovery in section 7.4. For software mitigation the specification + * suggests randomization of the indexes into c and x and randomization + * of the order of processing words. We aren't doing that here yet. + * Patches welcome to fix this. + */ + uint32_t mask = -((uint32_t)((0x04 - index) >> 2)); + uint32_t result = x[0] & mask; + mask = -((uint32_t)((0x04 - (index ^ 0x01)) >> 2)); + result ^= x[1] & mask; + mask = -((uint32_t)((0x04 - (index ^ 0x02)) >> 2)); + result ^= x[2] & mask; + mask = -((uint32_t)((0x04 - (index ^ 0x03)) >> 2)); + return result ^ (x[3] & mask); +} +#else +/* AVR is more or less immune to cache timing issues because it doesn't + * have anything like an L1 or L2 cache. Select the word directly */ +#define drysponge_select_x(x, index) ((x)[(index)]) +#endif + +#ifndef DRYGASCON_F_OPT +/** + * \brief Mixes a 32-bit value into the DrySPONGE128 state. + * + * \param state DrySPONGE128 state. + * \param data The data to be mixed in the bottom 10 bits. + */ +static void drysponge128_mix_phase_round + (drysponge128_state_t *state, uint32_t data) +{ + /* Mix in elements from x according to the 2-bit indexes in the data */ + state->c.W[0] ^= drysponge_select_x(state->x.W, data & 0x03); + state->c.W[2] ^= drysponge_select_x(state->x.W, (data >> 2) & 0x03); + state->c.W[4] ^= drysponge_select_x(state->x.W, (data >> 4) & 0x03); + state->c.W[6] ^= drysponge_select_x(state->x.W, (data >> 6) & 0x03); + state->c.W[8] ^= drysponge_select_x(state->x.W, (data >> 8) & 0x03); +} +#endif + +/** + * \brief Mixes a 32-bit value into the DrySPONGE256 state. + * + * \param state DrySPONGE256 state. + * \param data The data to be mixed in the bottom 18 bits. + */ +static void drysponge256_mix_phase_round + (drysponge256_state_t *state, uint32_t data) +{ + /* Mix in elements from x according to the 2-bit indexes in the data */ + state->c.W[0] ^= drysponge_select_x(state->x.W, data & 0x03); + state->c.W[2] ^= drysponge_select_x(state->x.W, (data >> 2) & 0x03); + state->c.W[4] ^= drysponge_select_x(state->x.W, (data >> 4) & 0x03); + state->c.W[6] ^= drysponge_select_x(state->x.W, (data >> 6) & 0x03); + state->c.W[8] ^= drysponge_select_x(state->x.W, (data >> 8) & 0x03); + state->c.W[10] ^= drysponge_select_x(state->x.W, (data >> 10) & 0x03); + state->c.W[12] ^= drysponge_select_x(state->x.W, (data >> 12) & 0x03); + state->c.W[14] ^= drysponge_select_x(state->x.W, (data >> 14) & 0x03); + state->c.W[16] ^= drysponge_select_x(state->x.W, (data >> 16) & 0x03); +} + +#ifndef DRYGASCON_F_OPT +/** + * \brief Mixes an input block into a DrySPONGE128 state. + * + * \param state The DrySPONGE128 state. + * \param data Full rate block containing the input data. + */ +static void drysponge128_mix_phase + (drysponge128_state_t *state, const unsigned char data[DRYSPONGE128_RATE],unsigned int ds) +{ + /* Mix 10-bit groups into the output, with the domain + * separator added to the last two groups */ + drysponge128_mix_phase_round + (state, data[0] | (((uint32_t)(data[1])) << 8)); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round + (state, (data[1] >> 2) | (((uint32_t)(data[2])) << 6)); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round + (state, (data[2] >> 4) | (((uint32_t)(data[3])) << 4)); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round + (state, (data[3] >> 6) | (((uint32_t)(data[4])) << 2)); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round + (state, data[5] | (((uint32_t)(data[6])) << 8)); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round + (state, (data[6] >> 2) | (((uint32_t)(data[7])) << 6)); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round + (state, (data[7] >> 4) | (((uint32_t)(data[8])) << 4)); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round + (state, (data[8] >> 6) | (((uint32_t)(data[9])) << 2)); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round + (state, data[10] | (((uint32_t)(data[11])) << 8)); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round + (state, (data[11] >> 2) | (((uint32_t)(data[12])) << 6)); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round + (state, (data[12] >> 4) | (((uint32_t)(data[13])) << 4)); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round + (state, ((data[13] >> 6) | (((uint32_t)(data[14])) << 2))); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round(state, data[15] ^ ds); + gascon128_core_round(&(state->c), 0); + drysponge128_mix_phase_round(state, ds >> 10); +} +#endif + +/** + * \brief Mixes an input block into a DrySPONGE256 state. + * + * \param state The DrySPONGE256 state. + * \param data Full rate block containing the input data. + */ +static void drysponge256_mix_phase + (drysponge256_state_t *state, const unsigned char data[DRYSPONGE256_RATE]) +{ + /* Mix 18-bit groups into the output, with the domain in the last group */ + drysponge256_mix_phase_round + (state, data[0] | (((uint32_t)(data[1])) << 8) | + (((uint32_t)(data[2])) << 16)); + gascon256_core_round(&(state->c), 0); + drysponge256_mix_phase_round + (state, (data[2] >> 2) | (((uint32_t)(data[3])) << 6) | + (((uint32_t)(data[4])) << 14)); + gascon256_core_round(&(state->c), 0); + drysponge256_mix_phase_round + (state, (data[4] >> 4) | (((uint32_t)(data[5])) << 4) | + (((uint32_t)(data[6])) << 12)); + gascon256_core_round(&(state->c), 0); + drysponge256_mix_phase_round + (state, (data[6] >> 6) | (((uint32_t)(data[7])) << 2) | + (((uint32_t)(data[8])) << 10)); + gascon256_core_round(&(state->c), 0); + drysponge256_mix_phase_round + (state, data[9] | (((uint32_t)(data[10])) << 8) | + (((uint32_t)(data[11])) << 16)); + gascon256_core_round(&(state->c), 0); + drysponge256_mix_phase_round + (state, (data[11] >> 2) | (((uint32_t)(data[12])) << 6) | + (((uint32_t)(data[13])) << 14)); + gascon256_core_round(&(state->c), 0); + drysponge256_mix_phase_round + (state, (data[13] >> 4) | (((uint32_t)(data[14])) << 4) | + (((uint32_t)(data[15])) << 12)); + gascon256_core_round(&(state->c), 0); + drysponge256_mix_phase_round + (state, (data[15] >> 6) ^ state->domain); + + /* Revert to the default domain separator for the next block */ + state->domain = 0; +} + +#ifdef DRYGASCON_F_OPT +void DRYGASCON_F_OPT(drysponge128_state_t *state, const unsigned char *input,unsigned int ds, unsigned int rounds); +static void drygascon128_f_impl(drysponge128_state_t *state, const unsigned char *input,unsigned int ds, unsigned int rounds){ + DRYGASCON_F_OPT(state, input, ds, rounds); +} +#else +void drygascon128_f_impl(drysponge128_state_t *state, const unsigned char *input,unsigned int ds, unsigned int rounds){ + drysponge128_mix_phase(state, input ,ds); + drysponge128_g_impl(state,rounds); +} +#endif +void drygascon128_f_wrap(drysponge128_state_t *state, const unsigned char *input, unsigned len){ + drysponge128_rate_t padded;//enforce alignement (if needed by f_impl) + const unsigned char*in; + if (len < DRYSPONGE128_RATE) { + memcpy(padded.B, input, len); + padded.B[len] = 0x01; + memset(padded.B + len + 1, 0, DRYSPONGE128_RATE - len - 1); + in=padded.B; + } else { + #ifdef DRYGASCON_ALIGN_INPUT_32 + memcpy(padded.B,input,DRYSPONGE128_RATE); + in=padded.B; + #else + in=input; + #endif + } + drygascon128_f_impl(state, in,state->domain,state->rounds); + /* Revert to the default domain separator for the next block */ + state->domain = 0; +} + +void drysponge256_f_absorb + (drysponge256_state_t *state, const unsigned char *input, unsigned len) +{ + if (len >= DRYSPONGE256_RATE) { + drysponge256_mix_phase(state, input); + } else { + unsigned char padded[DRYSPONGE256_RATE]; + memcpy(padded, input, len); + padded[len] = 0x01; + memset(padded + len + 1, 0, DRYSPONGE256_RATE - len - 1); + drysponge256_mix_phase(state, padded); + } +} + +/** + * \brief Determine if some of the words of an "x" value are identical. + * + * \param x Points to the "x" buffer to check. + * + * \return Non-zero if some of the words are the same, zero if they are + * distinct from each other. + * + * We try to perform the check in constant time to avoid giving away + * any information about the value of the key. + */ +static int drysponge_x_words_are_same(const uint32_t x[4]) +{ + unsigned i, j; + int result = 0; + for (i = 0; i < 3; ++i) { + for (j = i + 1; j < 4; ++j) { + uint32_t check = x[i] ^ x[j]; + result |= (int)((0x100000000ULL - check) >> 32); + } + } + return result; +} + + +int drysponge128_safe_alignement(const drysponge128_state_t*state){ + return 0==(0xF & (uintptr_t )&(state->x)); +} + +void drysponge128_setup + (drysponge128_state_t *state, const unsigned char *key, unsigned int keysize, + const unsigned char *nonce, int final_block) +{ + if(DRYGASCON128_SAFEKEY_SIZE==keysize){ + /* Fill C and X directly with the key */ + memcpy(state->c.B, key, sizeof(state->c)); + memcpy(state->x.B, key+ sizeof(state->c), sizeof(state->x)); + while (drysponge_x_words_are_same(state->x.W)); //block here if the key is not valid + + } else { + /* Fill the GASCON-128 state with repeated copies of the key */ + memcpy(state->c.B, key, 16); + memcpy(state->c.B + 16, key, 16); + memcpy(state->c.B + 32, key, 8); + + if(DRYGASCON128_FASTKEY_SIZE==keysize){ + + /* Fill X with the 16 last bytes of the key */ + memcpy(state->x.B, key+16, sizeof(state->x)); + while (drysponge_x_words_are_same(state->x.W)); //block here if the key is not valid + + } else if(DRYGASCON128_MINKEY_SIZE==keysize){ + + /* Generate the "x" value for the state. All four words of "x" + * must be unique because they will be used in drysponge_select_x() + * as stand-ins for the bit pairs 00, 01, 10, and 11. + * + * Run the core block operation over and over until "x" is unique. + * Technically the runtime here is key-dependent and not constant. + * If the input key is randomized, this should only take 1 round + * on average so it is "almost constant time". + */ + do { + //gascon128_core_round(&(state->c), 0); + //drysponge128_g_impl(state,1); + gascon128_g0(state); + } while (drysponge_x_words_are_same(state->c.W)); + memcpy(state->x.W, state->c.W, sizeof(state->x)); + + /* Replace the generated "x" value in the state with the key prefix */ + memcpy(state->c.W, key, sizeof(state->x)); + } + } + + /* Absorb the nonce into the state with an increased number of rounds */ + state->rounds = DRYSPONGE128_INIT_ROUNDS; + state->domain = DRYDOMAIN128_NONCE; + if (final_block) + state->domain |= DRYDOMAIN128_FINAL; + drygascon128_f_wrap(state, nonce, 16); + + /* Set up the normal number of rounds for future operations */ + state->rounds = DRYSPONGE128_ROUNDS; +} + +void drysponge256_setup + (drysponge256_state_t *state, const unsigned char *key, + const unsigned char *nonce, int final_block) +{ + /* Fill the GASCON-256 state with repeated copies of the key */ + memcpy(state->c.B, key, 32); + memcpy(state->c.B + 32, key, 32); + memcpy(state->c.B + 64, key, 8); + + /* Generate the "x" value for the state */ + do { + gascon256_core_round(&(state->c), 0); + } while (drysponge_x_words_are_same(state->c.W)); + memcpy(state->x.W, state->c.W, sizeof(state->x)); + + /* Replace the generated "x" value in the state with the key prefix */ + memcpy(state->c.W, key, sizeof(state->x)); + + /* Absorb the nonce into the state with an increased number of rounds */ + state->rounds = DRYSPONGE256_INIT_ROUNDS; + state->domain = DRYDOMAIN256_NONCE; + if (final_block) + state->domain |= DRYDOMAIN256_FINAL; + drysponge256_f_absorb(state, nonce, 16); + drysponge256_g(state); + + /* Set up the normal number of rounds for future operations */ + state->rounds = DRYSPONGE256_ROUNDS; +} diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.h b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.h new file mode 100644 index 0000000..0f907bd --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.h @@ -0,0 +1,379 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LW_INTERNAL_DRYSPONGE_H +#define LW_INTERNAL_DRYSPONGE_H + +#include "drygascon.h" +#include "drygascon128_arm_selector.h" + +#include "internal-util.h" + +/** + * \file internal-drysponge.h + * \brief Internal implementation of DrySPONGE for the DryGASCON cipher. + * + * References: https://github.com/sebastien-riou/DryGASCON + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Size of the GASCON-128 permutation state in bytes. + */ +#define GASCON128_STATE_SIZE 40 + +/** + * \brief Size of the GASCON-256 permutation state in bytes. + */ +#define GASCON256_STATE_SIZE 72 + +/** + * \brief Rate of absorption and squeezing for DrySPONGE128. + */ +#define DRYSPONGE128_RATE 16 + +/** + * \brief Rate of absorption and squeezing for DrySPONGE256. + */ +#define DRYSPONGE256_RATE 16 + +/** + * \brief Size of the "x" value for DrySPONGE128. + */ +#define DRYSPONGE128_XSIZE 16 + +/** + * \brief Size of the "x" value for DrySPONGE256. + */ +#define DRYSPONGE256_XSIZE 16 + +/** + * \brief Normal number of rounds for DrySPONGE128 when absorbing + * and squeezing data. + */ +#define DRYSPONGE128_ROUNDS 7 + +/** + * \brief Number of rounds for DrySPONGE128 during initialization. + */ +#define DRYSPONGE128_INIT_ROUNDS 11 + +/** + * \brief Normal number of rounds for DrySPONGE256 when absorbing + * and squeezing data. + */ +#define DRYSPONGE256_ROUNDS 8 + +/** + * \brief Number of rounds for DrySPONGE256 during initialization. + */ +#define DRYSPONGE256_INIT_ROUNDS 12 + +#ifdef DRYGASCON_F_OPT + + /** + * \brief DrySPONGE128 domain bit for a padded block. + */ + #define DRYDOMAIN128_PADDED (1 << 0) + + /** + * \brief DrySPONGE128 domain bit for a final block. + */ + #define DRYDOMAIN128_FINAL (1 << 1) + + /** + * \brief DrySPONGE128 domain value for processing the nonce. + */ + #define DRYDOMAIN128_NONCE (1 << 2) + + /** + * \brief DrySPONGE128 domain value for processing the associated data. + */ + #define DRYDOMAIN128_ASSOC_DATA (2 << 2) + + /** + * \brief DrySPONGE128 domain value for processing the message. + */ + #define DRYDOMAIN128_MESSAGE (3 << 2) + +#else + + /** + * \brief DrySPONGE128 domain bit for a padded block. + */ + #define DRYDOMAIN128_PADDED (1 << 8) + + /** + * \brief DrySPONGE128 domain bit for a final block. + */ + #define DRYDOMAIN128_FINAL (1 << 9) + + /** + * \brief DrySPONGE128 domain value for processing the nonce. + */ + #define DRYDOMAIN128_NONCE (1 << 10) + + /** + * \brief DrySPONGE128 domain value for processing the associated data. + */ + #define DRYDOMAIN128_ASSOC_DATA (2 << 10) + + + /** + * \brief DrySPONGE128 domain value for processing the message. + */ + #define DRYDOMAIN128_MESSAGE (3 << 10) + +#endif + + +/** + * \brief DrySPONGE256 domain bit for a padded block. + */ +#define DRYDOMAIN256_PADDED (1 << 2) + +/** + * \brief DrySPONGE256 domain bit for a final block. + */ +#define DRYDOMAIN256_FINAL (1 << 3) + +/** + * \brief DrySPONGE256 domain value for processing the nonce. + */ +#define DRYDOMAIN256_NONCE (1 << 4) + +/** + * \brief DrySPONGE256 domain value for processing the associated data. + */ +#define DRYDOMAIN256_ASSOC_DATA (2 << 4) + +/** + * \brief DrySPONGE256 domain value for processing the message. + */ +#define DRYDOMAIN256_MESSAGE (3 << 4) + +/** + * \brief Internal state of the GASCON-128 permutation. + */ +typedef union +{ + uint64_t S[GASCON128_STATE_SIZE / 8]; /**< 64-bit words of the state */ + uint32_t W[GASCON128_STATE_SIZE / 4]; /**< 32-bit words of the state */ + uint8_t B[GASCON128_STATE_SIZE]; /**< Bytes of the state */ + +} gascon128_state_t; + +/** + * \brief Internal state of the GASCON-256 permutation. + */ +typedef union +{ + uint64_t S[GASCON256_STATE_SIZE / 8]; /**< 64-bit words of the state */ + uint32_t W[GASCON256_STATE_SIZE / 4]; /**< 32-bit words of the state */ + uint8_t B[GASCON256_STATE_SIZE]; /**< Bytes of the state */ + +} gascon256_state_t; + +/** + * \brief Structure of a rate block for DrySPONGE128. + */ +typedef union +{ + uint64_t S[DRYSPONGE128_RATE / 8]; /**< 64-bit words of the rate */ + uint32_t W[DRYSPONGE128_RATE / 4]; /**< 32-bit words of the rate */ + uint8_t B[DRYSPONGE128_RATE]; /**< Bytes of the rate */ + +} drysponge128_rate_t; + +/** + * \brief Structure of a rate block for DrySPONGE256. + */ +typedef union +{ + uint64_t S[DRYSPONGE256_RATE / 8]; /**< 64-bit words of the rate */ + uint32_t W[DRYSPONGE256_RATE / 4]; /**< 32-bit words of the rate */ + uint8_t B[DRYSPONGE256_RATE]; /**< Bytes of the rate */ + +} drysponge256_rate_t; + +/** + * \brief Structure of the "x" value for DrySPONGE128. + */ +typedef union +{ + uint64_t S[DRYSPONGE128_XSIZE / 8]; /**< 64-bit words of the rate */ + uint32_t W[DRYSPONGE128_XSIZE / 4]; /**< 32-bit words of the rate */ + uint8_t B[DRYSPONGE128_XSIZE]; /**< Bytes of the rate */ + +} __attribute__((aligned(16))) drysponge128_x_t; + +/** + * \brief Structure of the "x" value for DrySPONGE256. + */ +typedef union +{ + uint64_t S[DRYSPONGE256_XSIZE / 8]; /**< 64-bit words of the rate */ + uint32_t W[DRYSPONGE256_XSIZE / 4]; /**< 32-bit words of the rate */ + uint8_t B[DRYSPONGE256_XSIZE]; /**< Bytes of the rate */ + +} drysponge256_x_t; + +/** + * \brief Structure of the rolling DrySPONGE128 state. + */ +typedef struct +{ + gascon128_state_t c; /**< GASCON-128 state for the capacity */ + uint32_t domain; /**< Domain value to mix on next F call */ + uint32_t rounds; /**< Number of rounds for next G call */ + drysponge128_rate_t r; /**< Buffer for a rate block of data */ + drysponge128_x_t x; /**< "x" value for the sponge */ +} __attribute__((aligned(16))) drysponge128_state_t; + +/** + * \brief Structure of the rolling DrySPONGE256 state. + */ +typedef struct +{ + gascon256_state_t c; /**< GASCON-256 state for the capacity */ + drysponge256_rate_t r; /**< Buffer for a rate block of data */ + drysponge256_x_t x; /**< "x" value for the sponge */ + uint32_t domain; /**< Domain value to mix on next F call */ + uint32_t rounds; /**< Number of rounds for next G call */ + +} drysponge256_state_t; + +/** + * \brief Permutes the GASCON-128 state using one iteration of CoreRound. + * + * \param state The GASCON-128 state to be permuted. + * \param round The round number. + * + * The input and output \a state will be in little-endian byte order. + */ +void gascon128_core_round(gascon128_state_t *state, uint8_t round); + +/** + * \brief Permutes the GASCON-256 state using one iteration of CoreRound. + * + * \param state The GASCON-256 state to be permuted. + * \param round The round number. + * + * The input and output \a state will be in little-endian byte order. + */ +void gascon256_core_round(gascon256_state_t *state, uint8_t round); + +/** + * \brief Performs the DrySPONGE128 G function which runs the core + * rounds and squeezes data out of the GASGON-128 state. + * + * \param state The DrySPONGE128 state. + * + * The data that is squeezed out will be in state->r on exit. + */ +void drysponge128_g(drysponge128_state_t *state); + +/** + * \brief Performs the DrySPONGE256 G function which runs the core + * rounds and squeezes data out of the GASGON-256 state. + * + * \param state The DrySPONGE256 state. + * + * The data that is squeezed out will be in state->r on exit. + */ +void drysponge256_g(drysponge256_state_t *state); + +/** + * \brief Performs the DrySPONGE128 G function which runs the core + * rounds but does not squeeze out any output. + * + * \param state The DrySPONGE128 state. + */ +void drysponge128_g_core(drysponge128_state_t *state); + +/** + * \brief Performs the DrySPONGE256 G function which runs the core + * rounds but does not squeeze out any output. + * + * \param state The DrySPONGE256 state. + */ +void drysponge256_g_core(drysponge256_state_t *state); + +/** + * \brief Performs the absorption phase of the DrySPONGE256 F function. + * + * \param state The DrySPONGE256 state. + * \param input The block of input data to incorporate into the state. + * \param len The length of the input block, which must be less than + * or equal to DRYSPONGE256_RATE. Smaller input blocks will be padded. + * + * This function must be followed by a call to drysponge256_g() or + * drysponge256_g_core() to perform the full F operation. + */ +void drysponge256_f_absorb + (drysponge256_state_t *state, const unsigned char *input, unsigned len); + +void drygascon128_f_wrap(drysponge128_state_t *state, const unsigned char *input, unsigned len); + +/** + * \brief Determine if state alignement is safe vs timing attacks. + * + * \param state Points to the state to check. + * + * \return Non-zero if alignement is safe. + * + * We expect this to be completly optimized out by compiler if the alignement is enforced at build time + */ +int drysponge128_safe_alignement(const drysponge128_state_t*state); + +/** + * \brief Set up a DrySPONGE128 state to begin encryption or decryption. + * + * \param state The DrySPONGE128 state. + * \param key Points to the 16 bytes of the key. + * \param nonce Points to the 16 bytes of the nonce. + * \param final_block Non-zero if after key setup there will be no more blocks. + */ +void drysponge128_setup + (drysponge128_state_t *state, const unsigned char *key, unsigned int keysize, + const unsigned char *nonce, int final_block); + +/** + * \brief Set up a DrySPONGE256 state to begin encryption or decryption. + * + * \param state The DrySPONGE256 state. + * \param key Points to the 32 bytes of the key. + * \param nonce Points to the 16 bytes of the nonce. + * \param final_block Non-zero if after key setup there will be no more blocks. + */ +void drysponge256_setup + (drysponge256_state_t *state, const unsigned char *key, + const unsigned char *nonce, int final_block); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-util.h b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-util.h new file mode 100644 index 0000000..e30166d --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-util.h @@ -0,0 +1,702 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LW_INTERNAL_UTIL_H +#define LW_INTERNAL_UTIL_H + +#include + +/* Figure out how to inline functions using this C compiler */ +#if defined(__STDC__) && __STDC_VERSION__ >= 199901L +#define STATIC_INLINE static inline +#elif defined(__GNUC__) || defined(__clang__) +#define STATIC_INLINE static __inline__ +#else +#define STATIC_INLINE static +#endif + +/* Try to figure out whether the CPU is little-endian or big-endian. + * May need to modify this to include new compiler-specific defines. + * Alternatively, define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ in your + * compiler flags when you compile this library */ +#if defined(__x86_64) || defined(__x86_64__) || \ + defined(__i386) || defined(__i386__) || \ + defined(__AVR__) || defined(__arm) || defined(__arm__) || \ + defined(_M_AMD64) || defined(_M_X64) || defined(_M_IX86) || \ + defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM_FP) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == 1234) || \ + defined(__LITTLE_ENDIAN__) +#define LW_UTIL_LITTLE_ENDIAN 1 +#elif (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == 4321) || \ + defined(__BIG_ENDIAN__) +/* Big endian */ +#else +#error "Cannot determine the endianess of this platform" +#endif + +/* Helper macros to load and store values while converting endian-ness */ + +/* Load a big-endian 32-bit word from a byte buffer */ +#define be_load_word32(ptr) \ + ((((uint32_t)((ptr)[0])) << 24) | \ + (((uint32_t)((ptr)[1])) << 16) | \ + (((uint32_t)((ptr)[2])) << 8) | \ + ((uint32_t)((ptr)[3]))) + +/* Store a big-endian 32-bit word into a byte buffer */ +#define be_store_word32(ptr, x) \ + do { \ + uint32_t _x = (x); \ + (ptr)[0] = (uint8_t)(_x >> 24); \ + (ptr)[1] = (uint8_t)(_x >> 16); \ + (ptr)[2] = (uint8_t)(_x >> 8); \ + (ptr)[3] = (uint8_t)_x; \ + } while (0) + +/* Load a little-endian 32-bit word from a byte buffer */ +#define le_load_word32(ptr) \ + ((((uint32_t)((ptr)[3])) << 24) | \ + (((uint32_t)((ptr)[2])) << 16) | \ + (((uint32_t)((ptr)[1])) << 8) | \ + ((uint32_t)((ptr)[0]))) + +/* Store a little-endian 32-bit word into a byte buffer */ +#define le_store_word32(ptr, x) \ + do { \ + uint32_t _x = (x); \ + (ptr)[0] = (uint8_t)_x; \ + (ptr)[1] = (uint8_t)(_x >> 8); \ + (ptr)[2] = (uint8_t)(_x >> 16); \ + (ptr)[3] = (uint8_t)(_x >> 24); \ + } while (0) + +/* Load a big-endian 64-bit word from a byte buffer */ +#define be_load_word64(ptr) \ + ((((uint64_t)((ptr)[0])) << 56) | \ + (((uint64_t)((ptr)[1])) << 48) | \ + (((uint64_t)((ptr)[2])) << 40) | \ + (((uint64_t)((ptr)[3])) << 32) | \ + (((uint64_t)((ptr)[4])) << 24) | \ + (((uint64_t)((ptr)[5])) << 16) | \ + (((uint64_t)((ptr)[6])) << 8) | \ + ((uint64_t)((ptr)[7]))) + +/* Store a big-endian 64-bit word into a byte buffer */ +#define be_store_word64(ptr, x) \ + do { \ + uint64_t _x = (x); \ + (ptr)[0] = (uint8_t)(_x >> 56); \ + (ptr)[1] = (uint8_t)(_x >> 48); \ + (ptr)[2] = (uint8_t)(_x >> 40); \ + (ptr)[3] = (uint8_t)(_x >> 32); \ + (ptr)[4] = (uint8_t)(_x >> 24); \ + (ptr)[5] = (uint8_t)(_x >> 16); \ + (ptr)[6] = (uint8_t)(_x >> 8); \ + (ptr)[7] = (uint8_t)_x; \ + } while (0) + +/* Load a little-endian 64-bit word from a byte buffer */ +#define le_load_word64(ptr) \ + ((((uint64_t)((ptr)[7])) << 56) | \ + (((uint64_t)((ptr)[6])) << 48) | \ + (((uint64_t)((ptr)[5])) << 40) | \ + (((uint64_t)((ptr)[4])) << 32) | \ + (((uint64_t)((ptr)[3])) << 24) | \ + (((uint64_t)((ptr)[2])) << 16) | \ + (((uint64_t)((ptr)[1])) << 8) | \ + ((uint64_t)((ptr)[0]))) + +/* Store a little-endian 64-bit word into a byte buffer */ +#define le_store_word64(ptr, x) \ + do { \ + uint64_t _x = (x); \ + (ptr)[0] = (uint8_t)_x; \ + (ptr)[1] = (uint8_t)(_x >> 8); \ + (ptr)[2] = (uint8_t)(_x >> 16); \ + (ptr)[3] = (uint8_t)(_x >> 24); \ + (ptr)[4] = (uint8_t)(_x >> 32); \ + (ptr)[5] = (uint8_t)(_x >> 40); \ + (ptr)[6] = (uint8_t)(_x >> 48); \ + (ptr)[7] = (uint8_t)(_x >> 56); \ + } while (0) + +/* Load a big-endian 16-bit word from a byte buffer */ +#define be_load_word16(ptr) \ + ((((uint16_t)((ptr)[0])) << 8) | \ + ((uint16_t)((ptr)[1]))) + +/* Store a big-endian 16-bit word into a byte buffer */ +#define be_store_word16(ptr, x) \ + do { \ + uint16_t _x = (x); \ + (ptr)[0] = (uint8_t)(_x >> 8); \ + (ptr)[1] = (uint8_t)_x; \ + } while (0) + +/* Load a little-endian 16-bit word from a byte buffer */ +#define le_load_word16(ptr) \ + ((((uint16_t)((ptr)[1])) << 8) | \ + ((uint16_t)((ptr)[0]))) + +/* Store a little-endian 16-bit word into a byte buffer */ +#define le_store_word16(ptr, x) \ + do { \ + uint16_t _x = (x); \ + (ptr)[0] = (uint8_t)_x; \ + (ptr)[1] = (uint8_t)(_x >> 8); \ + } while (0) + +/* XOR a source byte buffer against a destination */ +#define lw_xor_block(dest, src, len) \ + do { \ + unsigned char *_dest = (dest); \ + const unsigned char *_src = (src); \ + unsigned _len = (len); \ + while (_len > 0) { \ + *_dest++ ^= *_src++; \ + --_len; \ + } \ + } while (0) + +/* XOR two source byte buffers and put the result in a destination buffer */ +#define lw_xor_block_2_src(dest, src1, src2, len) \ + do { \ + unsigned char *_dest = (dest); \ + const unsigned char *_src1 = (src1); \ + const unsigned char *_src2 = (src2); \ + unsigned _len = (len); \ + while (_len > 0) { \ + *_dest++ = *_src1++ ^ *_src2++; \ + --_len; \ + } \ + } while (0) + +/* XOR a source byte buffer against a destination and write to another + * destination at the same time */ +#define lw_xor_block_2_dest(dest2, dest, src, len) \ + do { \ + unsigned char *_dest2 = (dest2); \ + unsigned char *_dest = (dest); \ + const unsigned char *_src = (src); \ + unsigned _len = (len); \ + while (_len > 0) { \ + *_dest2++ = (*_dest++ ^= *_src++); \ + --_len; \ + } \ + } while (0) + +/* XOR two byte buffers and write to a destination which at the same + * time copying the contents of src2 to dest2 */ +#define lw_xor_block_copy_src(dest2, dest, src1, src2, len) \ + do { \ + unsigned char *_dest2 = (dest2); \ + unsigned char *_dest = (dest); \ + const unsigned char *_src1 = (src1); \ + const unsigned char *_src2 = (src2); \ + unsigned _len = (len); \ + while (_len > 0) { \ + unsigned char _temp = *_src2++; \ + *_dest2++ = _temp; \ + *_dest++ = *_src1++ ^ _temp; \ + --_len; \ + } \ + } while (0) + +/* XOR a source byte buffer against a destination and write to another + * destination at the same time. This version swaps the source value + * into the "dest" buffer */ +#define lw_xor_block_swap(dest2, dest, src, len) \ + do { \ + unsigned char *_dest2 = (dest2); \ + unsigned char *_dest = (dest); \ + const unsigned char *_src = (src); \ + unsigned _len = (len); \ + while (_len > 0) { \ + unsigned char _temp = *_src++; \ + *_dest2++ = *_dest ^ _temp; \ + *_dest++ = _temp; \ + --_len; \ + } \ + } while (0) + +/* Rotation functions need to be optimised for best performance on AVR. + * The most efficient rotations are where the number of bits is 1 or a + * multiple of 8, so we compose the efficient rotations to produce all + * other rotation counts of interest. */ + +#if defined(__AVR__) +#define LW_CRYPTO_ROTATE32_COMPOSED 1 +#else +#define LW_CRYPTO_ROTATE32_COMPOSED 0 +#endif + +/* Rotation macros for 32-bit arguments */ + +/* Generic left rotate */ +#define leftRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (32 - (bits))); \ + })) + +/* Generic right rotate */ +#define rightRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (32 - (bits))); \ + })) + +#if !LW_CRYPTO_ROTATE32_COMPOSED + +/* Left rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define leftRotate1(a) (leftRotate((a), 1)) +#define leftRotate2(a) (leftRotate((a), 2)) +#define leftRotate3(a) (leftRotate((a), 3)) +#define leftRotate4(a) (leftRotate((a), 4)) +#define leftRotate5(a) (leftRotate((a), 5)) +#define leftRotate6(a) (leftRotate((a), 6)) +#define leftRotate7(a) (leftRotate((a), 7)) +#define leftRotate8(a) (leftRotate((a), 8)) +#define leftRotate9(a) (leftRotate((a), 9)) +#define leftRotate10(a) (leftRotate((a), 10)) +#define leftRotate11(a) (leftRotate((a), 11)) +#define leftRotate12(a) (leftRotate((a), 12)) +#define leftRotate13(a) (leftRotate((a), 13)) +#define leftRotate14(a) (leftRotate((a), 14)) +#define leftRotate15(a) (leftRotate((a), 15)) +#define leftRotate16(a) (leftRotate((a), 16)) +#define leftRotate17(a) (leftRotate((a), 17)) +#define leftRotate18(a) (leftRotate((a), 18)) +#define leftRotate19(a) (leftRotate((a), 19)) +#define leftRotate20(a) (leftRotate((a), 20)) +#define leftRotate21(a) (leftRotate((a), 21)) +#define leftRotate22(a) (leftRotate((a), 22)) +#define leftRotate23(a) (leftRotate((a), 23)) +#define leftRotate24(a) (leftRotate((a), 24)) +#define leftRotate25(a) (leftRotate((a), 25)) +#define leftRotate26(a) (leftRotate((a), 26)) +#define leftRotate27(a) (leftRotate((a), 27)) +#define leftRotate28(a) (leftRotate((a), 28)) +#define leftRotate29(a) (leftRotate((a), 29)) +#define leftRotate30(a) (leftRotate((a), 30)) +#define leftRotate31(a) (leftRotate((a), 31)) + +/* Right rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define rightRotate1(a) (rightRotate((a), 1)) +#define rightRotate2(a) (rightRotate((a), 2)) +#define rightRotate3(a) (rightRotate((a), 3)) +#define rightRotate4(a) (rightRotate((a), 4)) +#define rightRotate5(a) (rightRotate((a), 5)) +#define rightRotate6(a) (rightRotate((a), 6)) +#define rightRotate7(a) (rightRotate((a), 7)) +#define rightRotate8(a) (rightRotate((a), 8)) +#define rightRotate9(a) (rightRotate((a), 9)) +#define rightRotate10(a) (rightRotate((a), 10)) +#define rightRotate11(a) (rightRotate((a), 11)) +#define rightRotate12(a) (rightRotate((a), 12)) +#define rightRotate13(a) (rightRotate((a), 13)) +#define rightRotate14(a) (rightRotate((a), 14)) +#define rightRotate15(a) (rightRotate((a), 15)) +#define rightRotate16(a) (rightRotate((a), 16)) +#define rightRotate17(a) (rightRotate((a), 17)) +#define rightRotate18(a) (rightRotate((a), 18)) +#define rightRotate19(a) (rightRotate((a), 19)) +#define rightRotate20(a) (rightRotate((a), 20)) +#define rightRotate21(a) (rightRotate((a), 21)) +#define rightRotate22(a) (rightRotate((a), 22)) +#define rightRotate23(a) (rightRotate((a), 23)) +#define rightRotate24(a) (rightRotate((a), 24)) +#define rightRotate25(a) (rightRotate((a), 25)) +#define rightRotate26(a) (rightRotate((a), 26)) +#define rightRotate27(a) (rightRotate((a), 27)) +#define rightRotate28(a) (rightRotate((a), 28)) +#define rightRotate29(a) (rightRotate((a), 29)) +#define rightRotate30(a) (rightRotate((a), 30)) +#define rightRotate31(a) (rightRotate((a), 31)) + +#else /* LW_CRYPTO_ROTATE32_COMPOSED */ + +/* Composed rotation macros where 1 and 8 are fast, but others are slow */ + +/* Left rotate by 1 */ +#define leftRotate1(a) (leftRotate((a), 1)) + +/* Left rotate by 2 */ +#define leftRotate2(a) (leftRotate(leftRotate((a), 1), 1)) + +/* Left rotate by 3 */ +#define leftRotate3(a) (leftRotate(leftRotate(leftRotate((a), 1), 1), 1)) + +/* Left rotate by 4 */ +#define leftRotate4(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 1), 1), 1), 1)) + +/* Left rotate by 5: Rotate left by 8, then right by 3 */ +#define leftRotate5(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 8), 1), 1), 1)) + +/* Left rotate by 6: Rotate left by 8, then right by 2 */ +#define leftRotate6(a) (rightRotate(rightRotate(leftRotate((a), 8), 1), 1)) + +/* Left rotate by 7: Rotate left by 8, then right by 1 */ +#define leftRotate7(a) (rightRotate(leftRotate((a), 8), 1)) + +/* Left rotate by 8 */ +#define leftRotate8(a) (leftRotate((a), 8)) + +/* Left rotate by 9: Rotate left by 8, then left by 1 */ +#define leftRotate9(a) (leftRotate(leftRotate((a), 8), 1)) + +/* Left rotate by 10: Rotate left by 8, then left by 2 */ +#define leftRotate10(a) (leftRotate(leftRotate(leftRotate((a), 8), 1), 1)) + +/* Left rotate by 11: Rotate left by 8, then left by 3 */ +#define leftRotate11(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 8), 1), 1), 1)) + +/* Left rotate by 12: Rotate left by 16, then right by 4 */ +#define leftRotate12(a) (rightRotate(rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +/* Left rotate by 13: Rotate left by 16, then right by 3 */ +#define leftRotate13(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1)) + +/* Left rotate by 14: Rotate left by 16, then right by 2 */ +#define leftRotate14(a) (rightRotate(rightRotate(leftRotate((a), 16), 1), 1)) + +/* Left rotate by 15: Rotate left by 16, then right by 1 */ +#define leftRotate15(a) (rightRotate(leftRotate((a), 16), 1)) + +/* Left rotate by 16 */ +#define leftRotate16(a) (leftRotate((a), 16)) + +/* Left rotate by 17: Rotate left by 16, then left by 1 */ +#define leftRotate17(a) (leftRotate(leftRotate((a), 16), 1)) + +/* Left rotate by 18: Rotate left by 16, then left by 2 */ +#define leftRotate18(a) (leftRotate(leftRotate(leftRotate((a), 16), 1), 1)) + +/* Left rotate by 19: Rotate left by 16, then left by 3 */ +#define leftRotate19(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1)) + +/* Left rotate by 20: Rotate left by 16, then left by 4 */ +#define leftRotate20(a) (leftRotate(leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +/* Left rotate by 21: Rotate left by 24, then right by 3 */ +#define leftRotate21(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 24), 1), 1), 1)) + +/* Left rotate by 22: Rotate left by 24, then right by 2 */ +#define leftRotate22(a) (rightRotate(rightRotate(leftRotate((a), 24), 1), 1)) + +/* Left rotate by 23: Rotate left by 24, then right by 1 */ +#define leftRotate23(a) (rightRotate(leftRotate((a), 24), 1)) + +/* Left rotate by 24 */ +#define leftRotate24(a) (leftRotate((a), 24)) + +/* Left rotate by 25: Rotate left by 24, then left by 1 */ +#define leftRotate25(a) (leftRotate(leftRotate((a), 24), 1)) + +/* Left rotate by 26: Rotate left by 24, then left by 2 */ +#define leftRotate26(a) (leftRotate(leftRotate(leftRotate((a), 24), 1), 1)) + +/* Left rotate by 27: Rotate left by 24, then left by 3 */ +#define leftRotate27(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 24), 1), 1), 1)) + +/* Left rotate by 28: Rotate right by 4 */ +#define leftRotate28(a) (rightRotate(rightRotate(rightRotate(rightRotate((a), 1), 1), 1), 1)) + +/* Left rotate by 29: Rotate right by 3 */ +#define leftRotate29(a) (rightRotate(rightRotate(rightRotate((a), 1), 1), 1)) + +/* Left rotate by 30: Rotate right by 2 */ +#define leftRotate30(a) (rightRotate(rightRotate((a), 1), 1)) + +/* Left rotate by 31: Rotate right by 1 */ +#define leftRotate31(a) (rightRotate((a), 1)) + +/* Define the 32-bit right rotations in terms of left rotations */ +#define rightRotate1(a) (leftRotate31((a))) +#define rightRotate2(a) (leftRotate30((a))) +#define rightRotate3(a) (leftRotate29((a))) +#define rightRotate4(a) (leftRotate28((a))) +#define rightRotate5(a) (leftRotate27((a))) +#define rightRotate6(a) (leftRotate26((a))) +#define rightRotate7(a) (leftRotate25((a))) +#define rightRotate8(a) (leftRotate24((a))) +#define rightRotate9(a) (leftRotate23((a))) +#define rightRotate10(a) (leftRotate22((a))) +#define rightRotate11(a) (leftRotate21((a))) +#define rightRotate12(a) (leftRotate20((a))) +#define rightRotate13(a) (leftRotate19((a))) +#define rightRotate14(a) (leftRotate18((a))) +#define rightRotate15(a) (leftRotate17((a))) +#define rightRotate16(a) (leftRotate16((a))) +#define rightRotate17(a) (leftRotate15((a))) +#define rightRotate18(a) (leftRotate14((a))) +#define rightRotate19(a) (leftRotate13((a))) +#define rightRotate20(a) (leftRotate12((a))) +#define rightRotate21(a) (leftRotate11((a))) +#define rightRotate22(a) (leftRotate10((a))) +#define rightRotate23(a) (leftRotate9((a))) +#define rightRotate24(a) (leftRotate8((a))) +#define rightRotate25(a) (leftRotate7((a))) +#define rightRotate26(a) (leftRotate6((a))) +#define rightRotate27(a) (leftRotate5((a))) +#define rightRotate28(a) (leftRotate4((a))) +#define rightRotate29(a) (leftRotate3((a))) +#define rightRotate30(a) (leftRotate2((a))) +#define rightRotate31(a) (leftRotate1((a))) + +#endif /* LW_CRYPTO_ROTATE32_COMPOSED */ + +/* Rotation macros for 64-bit arguments */ + +/* Generic left rotate */ +#define leftRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (64 - (bits))); \ + })) + +/* Generic right rotate */ +#define rightRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (64 - (bits))); \ + })) + +/* Left rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define leftRotate1_64(a) (leftRotate_64((a), 1)) +#define leftRotate2_64(a) (leftRotate_64((a), 2)) +#define leftRotate3_64(a) (leftRotate_64((a), 3)) +#define leftRotate4_64(a) (leftRotate_64((a), 4)) +#define leftRotate5_64(a) (leftRotate_64((a), 5)) +#define leftRotate6_64(a) (leftRotate_64((a), 6)) +#define leftRotate7_64(a) (leftRotate_64((a), 7)) +#define leftRotate8_64(a) (leftRotate_64((a), 8)) +#define leftRotate9_64(a) (leftRotate_64((a), 9)) +#define leftRotate10_64(a) (leftRotate_64((a), 10)) +#define leftRotate11_64(a) (leftRotate_64((a), 11)) +#define leftRotate12_64(a) (leftRotate_64((a), 12)) +#define leftRotate13_64(a) (leftRotate_64((a), 13)) +#define leftRotate14_64(a) (leftRotate_64((a), 14)) +#define leftRotate15_64(a) (leftRotate_64((a), 15)) +#define leftRotate16_64(a) (leftRotate_64((a), 16)) +#define leftRotate17_64(a) (leftRotate_64((a), 17)) +#define leftRotate18_64(a) (leftRotate_64((a), 18)) +#define leftRotate19_64(a) (leftRotate_64((a), 19)) +#define leftRotate20_64(a) (leftRotate_64((a), 20)) +#define leftRotate21_64(a) (leftRotate_64((a), 21)) +#define leftRotate22_64(a) (leftRotate_64((a), 22)) +#define leftRotate23_64(a) (leftRotate_64((a), 23)) +#define leftRotate24_64(a) (leftRotate_64((a), 24)) +#define leftRotate25_64(a) (leftRotate_64((a), 25)) +#define leftRotate26_64(a) (leftRotate_64((a), 26)) +#define leftRotate27_64(a) (leftRotate_64((a), 27)) +#define leftRotate28_64(a) (leftRotate_64((a), 28)) +#define leftRotate29_64(a) (leftRotate_64((a), 29)) +#define leftRotate30_64(a) (leftRotate_64((a), 30)) +#define leftRotate31_64(a) (leftRotate_64((a), 31)) +#define leftRotate32_64(a) (leftRotate_64((a), 32)) +#define leftRotate33_64(a) (leftRotate_64((a), 33)) +#define leftRotate34_64(a) (leftRotate_64((a), 34)) +#define leftRotate35_64(a) (leftRotate_64((a), 35)) +#define leftRotate36_64(a) (leftRotate_64((a), 36)) +#define leftRotate37_64(a) (leftRotate_64((a), 37)) +#define leftRotate38_64(a) (leftRotate_64((a), 38)) +#define leftRotate39_64(a) (leftRotate_64((a), 39)) +#define leftRotate40_64(a) (leftRotate_64((a), 40)) +#define leftRotate41_64(a) (leftRotate_64((a), 41)) +#define leftRotate42_64(a) (leftRotate_64((a), 42)) +#define leftRotate43_64(a) (leftRotate_64((a), 43)) +#define leftRotate44_64(a) (leftRotate_64((a), 44)) +#define leftRotate45_64(a) (leftRotate_64((a), 45)) +#define leftRotate46_64(a) (leftRotate_64((a), 46)) +#define leftRotate47_64(a) (leftRotate_64((a), 47)) +#define leftRotate48_64(a) (leftRotate_64((a), 48)) +#define leftRotate49_64(a) (leftRotate_64((a), 49)) +#define leftRotate50_64(a) (leftRotate_64((a), 50)) +#define leftRotate51_64(a) (leftRotate_64((a), 51)) +#define leftRotate52_64(a) (leftRotate_64((a), 52)) +#define leftRotate53_64(a) (leftRotate_64((a), 53)) +#define leftRotate54_64(a) (leftRotate_64((a), 54)) +#define leftRotate55_64(a) (leftRotate_64((a), 55)) +#define leftRotate56_64(a) (leftRotate_64((a), 56)) +#define leftRotate57_64(a) (leftRotate_64((a), 57)) +#define leftRotate58_64(a) (leftRotate_64((a), 58)) +#define leftRotate59_64(a) (leftRotate_64((a), 59)) +#define leftRotate60_64(a) (leftRotate_64((a), 60)) +#define leftRotate61_64(a) (leftRotate_64((a), 61)) +#define leftRotate62_64(a) (leftRotate_64((a), 62)) +#define leftRotate63_64(a) (leftRotate_64((a), 63)) + +/* Right rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define rightRotate1_64(a) (rightRotate_64((a), 1)) +#define rightRotate2_64(a) (rightRotate_64((a), 2)) +#define rightRotate3_64(a) (rightRotate_64((a), 3)) +#define rightRotate4_64(a) (rightRotate_64((a), 4)) +#define rightRotate5_64(a) (rightRotate_64((a), 5)) +#define rightRotate6_64(a) (rightRotate_64((a), 6)) +#define rightRotate7_64(a) (rightRotate_64((a), 7)) +#define rightRotate8_64(a) (rightRotate_64((a), 8)) +#define rightRotate9_64(a) (rightRotate_64((a), 9)) +#define rightRotate10_64(a) (rightRotate_64((a), 10)) +#define rightRotate11_64(a) (rightRotate_64((a), 11)) +#define rightRotate12_64(a) (rightRotate_64((a), 12)) +#define rightRotate13_64(a) (rightRotate_64((a), 13)) +#define rightRotate14_64(a) (rightRotate_64((a), 14)) +#define rightRotate15_64(a) (rightRotate_64((a), 15)) +#define rightRotate16_64(a) (rightRotate_64((a), 16)) +#define rightRotate17_64(a) (rightRotate_64((a), 17)) +#define rightRotate18_64(a) (rightRotate_64((a), 18)) +#define rightRotate19_64(a) (rightRotate_64((a), 19)) +#define rightRotate20_64(a) (rightRotate_64((a), 20)) +#define rightRotate21_64(a) (rightRotate_64((a), 21)) +#define rightRotate22_64(a) (rightRotate_64((a), 22)) +#define rightRotate23_64(a) (rightRotate_64((a), 23)) +#define rightRotate24_64(a) (rightRotate_64((a), 24)) +#define rightRotate25_64(a) (rightRotate_64((a), 25)) +#define rightRotate26_64(a) (rightRotate_64((a), 26)) +#define rightRotate27_64(a) (rightRotate_64((a), 27)) +#define rightRotate28_64(a) (rightRotate_64((a), 28)) +#define rightRotate29_64(a) (rightRotate_64((a), 29)) +#define rightRotate30_64(a) (rightRotate_64((a), 30)) +#define rightRotate31_64(a) (rightRotate_64((a), 31)) +#define rightRotate32_64(a) (rightRotate_64((a), 32)) +#define rightRotate33_64(a) (rightRotate_64((a), 33)) +#define rightRotate34_64(a) (rightRotate_64((a), 34)) +#define rightRotate35_64(a) (rightRotate_64((a), 35)) +#define rightRotate36_64(a) (rightRotate_64((a), 36)) +#define rightRotate37_64(a) (rightRotate_64((a), 37)) +#define rightRotate38_64(a) (rightRotate_64((a), 38)) +#define rightRotate39_64(a) (rightRotate_64((a), 39)) +#define rightRotate40_64(a) (rightRotate_64((a), 40)) +#define rightRotate41_64(a) (rightRotate_64((a), 41)) +#define rightRotate42_64(a) (rightRotate_64((a), 42)) +#define rightRotate43_64(a) (rightRotate_64((a), 43)) +#define rightRotate44_64(a) (rightRotate_64((a), 44)) +#define rightRotate45_64(a) (rightRotate_64((a), 45)) +#define rightRotate46_64(a) (rightRotate_64((a), 46)) +#define rightRotate47_64(a) (rightRotate_64((a), 47)) +#define rightRotate48_64(a) (rightRotate_64((a), 48)) +#define rightRotate49_64(a) (rightRotate_64((a), 49)) +#define rightRotate50_64(a) (rightRotate_64((a), 50)) +#define rightRotate51_64(a) (rightRotate_64((a), 51)) +#define rightRotate52_64(a) (rightRotate_64((a), 52)) +#define rightRotate53_64(a) (rightRotate_64((a), 53)) +#define rightRotate54_64(a) (rightRotate_64((a), 54)) +#define rightRotate55_64(a) (rightRotate_64((a), 55)) +#define rightRotate56_64(a) (rightRotate_64((a), 56)) +#define rightRotate57_64(a) (rightRotate_64((a), 57)) +#define rightRotate58_64(a) (rightRotate_64((a), 58)) +#define rightRotate59_64(a) (rightRotate_64((a), 59)) +#define rightRotate60_64(a) (rightRotate_64((a), 60)) +#define rightRotate61_64(a) (rightRotate_64((a), 61)) +#define rightRotate62_64(a) (rightRotate_64((a), 62)) +#define rightRotate63_64(a) (rightRotate_64((a), 63)) + +/* Rotate a 16-bit value left by a number of bits */ +#define leftRotate_16(a, bits) \ + (__extension__ ({ \ + uint16_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (16 - (bits))); \ + })) + +/* Rotate a 16-bit value right by a number of bits */ +#define rightRotate_16(a, bits) \ + (__extension__ ({ \ + uint16_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (16 - (bits))); \ + })) + +/* Left rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define leftRotate1_16(a) (leftRotate_16((a), 1)) +#define leftRotate2_16(a) (leftRotate_16((a), 2)) +#define leftRotate3_16(a) (leftRotate_16((a), 3)) +#define leftRotate4_16(a) (leftRotate_16((a), 4)) +#define leftRotate5_16(a) (leftRotate_16((a), 5)) +#define leftRotate6_16(a) (leftRotate_16((a), 6)) +#define leftRotate7_16(a) (leftRotate_16((a), 7)) +#define leftRotate8_16(a) (leftRotate_16((a), 8)) +#define leftRotate9_16(a) (leftRotate_16((a), 9)) +#define leftRotate10_16(a) (leftRotate_16((a), 10)) +#define leftRotate11_16(a) (leftRotate_16((a), 11)) +#define leftRotate12_16(a) (leftRotate_16((a), 12)) +#define leftRotate13_16(a) (leftRotate_16((a), 13)) +#define leftRotate14_16(a) (leftRotate_16((a), 14)) +#define leftRotate15_16(a) (leftRotate_16((a), 15)) + +/* Right rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define rightRotate1_16(a) (rightRotate_16((a), 1)) +#define rightRotate2_16(a) (rightRotate_16((a), 2)) +#define rightRotate3_16(a) (rightRotate_16((a), 3)) +#define rightRotate4_16(a) (rightRotate_16((a), 4)) +#define rightRotate5_16(a) (rightRotate_16((a), 5)) +#define rightRotate6_16(a) (rightRotate_16((a), 6)) +#define rightRotate7_16(a) (rightRotate_16((a), 7)) +#define rightRotate8_16(a) (rightRotate_16((a), 8)) +#define rightRotate9_16(a) (rightRotate_16((a), 9)) +#define rightRotate10_16(a) (rightRotate_16((a), 10)) +#define rightRotate11_16(a) (rightRotate_16((a), 11)) +#define rightRotate12_16(a) (rightRotate_16((a), 12)) +#define rightRotate13_16(a) (rightRotate_16((a), 13)) +#define rightRotate14_16(a) (rightRotate_16((a), 14)) +#define rightRotate15_16(a) (rightRotate_16((a), 15)) + +/* Rotate an 8-bit value left by a number of bits */ +#define leftRotate_8(a, bits) \ + (__extension__ ({ \ + uint8_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (8 - (bits))); \ + })) + +/* Rotate an 8-bit value right by a number of bits */ +#define rightRotate_8(a, bits) \ + (__extension__ ({ \ + uint8_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (8 - (bits))); \ + })) + +/* Left rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define leftRotate1_8(a) (leftRotate_8((a), 1)) +#define leftRotate2_8(a) (leftRotate_8((a), 2)) +#define leftRotate3_8(a) (leftRotate_8((a), 3)) +#define leftRotate4_8(a) (leftRotate_8((a), 4)) +#define leftRotate5_8(a) (leftRotate_8((a), 5)) +#define leftRotate6_8(a) (leftRotate_8((a), 6)) +#define leftRotate7_8(a) (leftRotate_8((a), 7)) + +/* Right rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define rightRotate1_8(a) (rightRotate_8((a), 1)) +#define rightRotate2_8(a) (rightRotate_8((a), 2)) +#define rightRotate3_8(a) (rightRotate_8((a), 3)) +#define rightRotate4_8(a) (rightRotate_8((a), 4)) +#define rightRotate5_8(a) (rightRotate_8((a), 5)) +#define rightRotate6_8(a) (rightRotate_8((a), 6)) +#define rightRotate7_8(a) (rightRotate_8((a), 7)) + +#endif diff --git a/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/nistlwc b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/nistlwc new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/nistlwc diff --git a/drygascon/Implementations/crypto_aead/drygascon128/designers b/drygascon/Implementations/crypto_aead/drygascon128/designers new file mode 100644 index 0000000..ff5e323 --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/designers @@ -0,0 +1 @@ +Sebastien Riou diff --git a/drygascon/Implementations/crypto_aead/drygascon128/ref/implementors b/drygascon/Implementations/crypto_aead/drygascon128/ref/implementors new file mode 100644 index 0000000..ff5e323 --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/ref/implementors @@ -0,0 +1 @@ +Sebastien Riou diff --git a/drygascon/Implementations/crypto_aead/drygascon128/ref/nistlwc b/drygascon/Implementations/crypto_aead/drygascon128/ref/nistlwc new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/drygascon/Implementations/crypto_aead/drygascon128/ref/nistlwc -- libgit2 0.26.0