Commit b9419420 by Sebastien Riou Committed by Enrico Pozzobon

drygascon add_arm_cortex-m

parent 3f904077
/*
* Copyright (C) 2020 Southern Storm Software, Pty Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "aead-common.h"
int aead_check_tag
(unsigned char *plaintext, unsigned long long plaintext_len,
const unsigned char *tag1, const unsigned char *tag2,
unsigned size)
{
/* Set "accum" to -1 if the tags match, or 0 if they don't match */
int accum = 0;
while (size > 0) {
accum |= (*tag1++ ^ *tag2++);
--size;
}
accum = (accum - 1) >> 8;
/* Destroy the plaintext if the tag match failed */
while (plaintext_len > 0) {
*plaintext++ &= accum;
--plaintext_len;
}
/* If "accum" is 0, return -1, otherwise return 0 */
return ~accum;
}
int aead_check_tag_precheck
(unsigned char *plaintext, unsigned long long plaintext_len,
const unsigned char *tag1, const unsigned char *tag2,
unsigned size, int precheck)
{
/* Set "accum" to -1 if the tags match, or 0 if they don't match */
int accum = 0;
while (size > 0) {
accum |= (*tag1++ ^ *tag2++);
--size;
}
accum = ((accum - 1) >> 8) & precheck;
/* Destroy the plaintext if the tag match failed */
while (plaintext_len > 0) {
*plaintext++ &= accum;
--plaintext_len;
}
/* If "accum" is 0, return -1, otherwise return 0 */
return ~accum;
}
/*
* Copyright (C) 2020 Southern Storm Software, Pty Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef LWCRYPTO_AEAD_COMMON_H
#define LWCRYPTO_AEAD_COMMON_H
#include <stddef.h>
/**
* \file aead-common.h
* \brief Definitions that are common across AEAD schemes.
*
* AEAD stands for "Authenticated Encryption with Associated Data".
* It is a standard API pattern for securely encrypting and
* authenticating packets of data.
*/
#ifdef __cplusplus
extern "C" {
#endif
/**
* \brief Encrypts and authenticates a packet with an AEAD scheme.
*
* \param c Buffer to receive the output.
* \param clen On exit, set to the length of the output which includes
* the ciphertext and the authentication tag.
* \param m Buffer that contains the plaintext message to encrypt.
* \param mlen Length of the plaintext message in bytes.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param nsec Secret nonce - normally not used by AEAD schemes.
* \param npub Points to the public nonce for the packet.
* \param k Points to the key to use to encrypt the packet.
*
* \return 0 on success, or a negative value if there was an error in
* the parameters.
*/
typedef int (*aead_cipher_encrypt_t)
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Decrypts and authenticates a packet with an AEAD scheme.
*
* \param m Buffer to receive the plaintext message on output.
* \param mlen Receives the length of the plaintext message on output.
* \param nsec Secret nonce - normally not used by AEAD schemes.
* \param c Buffer that contains the ciphertext and authentication
* tag to decrypt.
* \param clen Length of the input data in bytes, which includes the
* ciphertext and the authentication tag.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param npub Points to the public nonce for the packet.
* \param k Points to the key to use to decrypt the packet.
*
* \return 0 on success, -1 if the authentication tag was incorrect,
* or some other negative number if there was an error in the parameters.
*/
typedef int (*aead_cipher_decrypt_t)
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Hashes a block of input data.
*
* \param out Buffer to receive the hash output.
* \param in Points to the input data to be hashed.
* \param inlen Length of the input data in bytes.
*
* \return Returns zero on success or -1 if there was an error in the
* parameters.
*/
typedef int (*aead_hash_t)
(unsigned char *out, const unsigned char *in, unsigned long long inlen);
/**
* \brief Initializes the state for a hashing operation.
*
* \param state Hash state to be initialized.
*/
typedef void (*aead_hash_init_t)(void *state);
/**
* \brief Updates a hash state with more input data.
*
* \param state Hash state to be updated.
* \param in Points to the input data to be incorporated into the state.
* \param inlen Length of the input data to be incorporated into the state.
*/
typedef void (*aead_hash_update_t)
(void *state, const unsigned char *in, unsigned long long inlen);
/**
* \brief Returns the final hash value from a hashing operation.
*
* \param Hash state to be finalized.
* \param out Points to the output buffer to receive the hash value.
*/
typedef void (*aead_hash_finalize_t)(void *state, unsigned char *out);
/**
* \brief Aborbs more input data into an XOF state.
*
* \param state XOF state to be updated.
* \param in Points to the input data to be absorbed into the state.
* \param inlen Length of the input data to be absorbed into the state.
*
* \sa ascon_xof_init(), ascon_xof_squeeze()
*/
typedef void (*aead_xof_absorb_t)
(void *state, const unsigned char *in, unsigned long long inlen);
/**
* \brief Squeezes output data from an XOF state.
*
* \param state XOF state to squeeze the output data from.
* \param out Points to the output buffer to receive the squeezed data.
* \param outlen Number of bytes of data to squeeze out of the state.
*/
typedef void (*aead_xof_squeeze_t)
(void *state, unsigned char *out, unsigned long long outlen);
/**
* \brief No special AEAD features.
*/
#define AEAD_FLAG_NONE 0x0000
/**
* \brief The natural byte order of the AEAD cipher is little-endian.
*
* If this flag is not present, then the natural byte order of the
* AEAD cipher should be assumed to be big-endian.
*
* The natural byte order may be useful when formatting packet sequence
* numbers as nonces. The application needs to know whether the sequence
* number should be packed into the leading or trailing bytes of the nonce.
*/
#define AEAD_FLAG_LITTLE_ENDIAN 0x0001
/**
* \brief Meta-information about an AEAD cipher.
*/
typedef struct
{
const char *name; /**< Name of the cipher */
unsigned key_len; /**< Length of the key in bytes */
unsigned nonce_len; /**< Length of the nonce in bytes */
unsigned tag_len; /**< Length of the tag in bytes */
unsigned flags; /**< Flags for extra features */
aead_cipher_encrypt_t encrypt; /**< AEAD encryption function */
aead_cipher_decrypt_t decrypt; /**< AEAD decryption function */
unsigned char *expected; /**< AEAD encryption benchmark expected result */
} aead_cipher_t;
/**
* \brief Meta-information about a hash algorithm that is related to an AEAD.
*
* Regular hash algorithms should provide the "hash", "init", "update",
* and "finalize" functions. Extensible Output Functions (XOF's) should
* proivde the "hash", "init", "absorb", and "squeeze" functions.
*/
typedef struct
{
const char *name; /**< Name of the hash algorithm */
size_t state_size; /**< Size of the incremental state structure */
unsigned hash_len; /**< Length of the hash in bytes */
unsigned flags; /**< Flags for extra features */
aead_hash_t hash; /**< All in one hashing function */
aead_hash_init_t init; /**< Incremental hash/XOF init function */
aead_hash_update_t update; /**< Incremental hash update function */
aead_hash_finalize_t finalize; /**< Incremental hash finalize function */
aead_xof_absorb_t absorb; /**< Incremental XOF absorb function */
aead_xof_squeeze_t squeeze; /**< Incremental XOF squeeze function */
} aead_hash_algorithm_t;
/**
* \brief Check an authentication tag in constant time.
*
* \param plaintext Points to the plaintext data.
* \param plaintext_len Length of the plaintext in bytes.
* \param tag1 First tag to compare.
* \param tag2 Second tag to compare.
* \param tag_len Length of the tags in bytes.
*
* \return Returns -1 if the tag check failed or 0 if the check succeeded.
*
* If the tag check fails, then the \a plaintext will also be zeroed to
* prevent it from being used accidentally by the application when the
* ciphertext was invalid.
*/
int aead_check_tag
(unsigned char *plaintext, unsigned long long plaintext_len,
const unsigned char *tag1, const unsigned char *tag2,
unsigned tag_len);
/**
* \brief Check an authentication tag in constant time with a previous check.
*
* \param plaintext Points to the plaintext data.
* \param plaintext_len Length of the plaintext in bytes.
* \param tag1 First tag to compare.
* \param tag2 Second tag to compare.
* \param tag_len Length of the tags in bytes.
* \param precheck Set to -1 if previous check succeeded or 0 if it failed.
*
* \return Returns -1 if the tag check failed or 0 if the check succeeded.
*
* If the tag check fails, then the \a plaintext will also be zeroed to
* prevent it from being used accidentally by the application when the
* ciphertext was invalid.
*
* This version can be used to incorporate other information about the
* correctness of the plaintext into the final result.
*/
int aead_check_tag_precheck
(unsigned char *plaintext, unsigned long long plaintext_len,
const unsigned char *tag1, const unsigned char *tag2,
unsigned tag_len, int precheck);
#ifdef __cplusplus
}
#endif
#endif
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
/*
* Copyright (C) 2020 Southern Storm Software, Pty Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "drygascon.h"
#include "internal-drysponge.h"
#include <string.h>
uint8_t drygascon128k32_expected[DRYGASCON128_TAG_SIZE]={0x66,0x5A,0xDE,0x6C,0x0F,0xBD,0x48,0x8C,0x5E,0xA4,0x77,0x5D,0xD6,0x24,0xDA,0xD7};
uint8_t drygascon128k56_expected[DRYGASCON128_TAG_SIZE]={0x7B,0x8B,0x9D,0x58,0xA7,0xF7,0x5F,0x1E,0x56,0x99,0x46,0xD6,0x24,0xC4,0xF7,0x68};
uint8_t drygascon128k16_expected[DRYGASCON128_TAG_SIZE]={0x14,0xA5,0x21,0x17,0xFF,0x52,0x4F,0x7C,0xCB,0xB3,0xEB,0xE4,0x05,0xEF,0x18,0xA4};
const aead_cipher_t const drygascon128k32_cipher = {
"DryGASCON128k32",
DRYGASCON128_FASTKEY_SIZE,
DRYGASCON128_NONCE_SIZE,
DRYGASCON128_TAG_SIZE,
AEAD_FLAG_LITTLE_ENDIAN,
drygascon128k32_aead_encrypt,
drygascon128k32_aead_decrypt,
drygascon128k32_expected
};
const aead_cipher_t const drygascon128_cipher = {
"DryGASCON128k32",
DRYGASCON128_FASTKEY_SIZE,
DRYGASCON128_NONCE_SIZE,
DRYGASCON128_TAG_SIZE,
AEAD_FLAG_LITTLE_ENDIAN,
drygascon128k32_aead_encrypt,
drygascon128k32_aead_decrypt,
drygascon128k32_expected
};
const aead_cipher_t const drygascon128k56_cipher = {
"DryGASCON128k56",
DRYGASCON128_SAFEKEY_SIZE,
DRYGASCON128_NONCE_SIZE,
DRYGASCON128_TAG_SIZE,
AEAD_FLAG_LITTLE_ENDIAN,
drygascon128k56_aead_encrypt,
drygascon128k56_aead_decrypt,
drygascon128k56_expected
};
const aead_cipher_t const drygascon128k16_cipher = {
"DryGASCON128k16",
DRYGASCON128_MINKEY_SIZE,
DRYGASCON128_NONCE_SIZE,
DRYGASCON128_TAG_SIZE,
AEAD_FLAG_LITTLE_ENDIAN,
drygascon128k16_aead_encrypt,
drygascon128k16_aead_decrypt,
drygascon128k16_expected
};
aead_cipher_t const drygascon256_cipher = {
"DryGASCON256",
DRYGASCON256_KEY_SIZE,
DRYGASCON256_NONCE_SIZE,
DRYGASCON256_TAG_SIZE,
AEAD_FLAG_LITTLE_ENDIAN,
drygascon256_aead_encrypt,
drygascon256_aead_decrypt
};
aead_hash_algorithm_t const drygascon128_hash_algorithm = {
"DryGASCON128-HASH",
sizeof(int),
DRYGASCON128_HASH_SIZE,
AEAD_FLAG_LITTLE_ENDIAN,
drygascon128_hash,
(aead_hash_init_t)0,
(aead_hash_update_t)0,
(aead_hash_finalize_t)0,
(aead_xof_absorb_t)0,
(aead_xof_squeeze_t)0
};
aead_hash_algorithm_t const drygascon256_hash_algorithm = {
"DryGASCON256-HASH",
sizeof(int),
DRYGASCON256_HASH_SIZE,
AEAD_FLAG_LITTLE_ENDIAN,
drygascon256_hash,
(aead_hash_init_t)0,
(aead_hash_update_t)0,
(aead_hash_finalize_t)0,
(aead_xof_absorb_t)0,
(aead_xof_squeeze_t)0
};
/**
* \brief Processes associated data for DryGASCON128.
*
* \param state DrySPONGE128 sponge state.
* \param ad Points to the associated data.
* \param adlen Length of the associated data, must not be zero.
* \param finalize Non-zero to finalize packet processing because
* the message is zero-length.
*/
static void drygascon128_process_ad
(drysponge128_state_t *state, const unsigned char *ad,
unsigned long long adlen, int finalize)
{
/* Process all blocks except the last one */
while (adlen > DRYSPONGE128_RATE) {
drygascon128_f_wrap(state, ad, DRYSPONGE128_RATE);
//drysponge128_g_core(state);
ad += DRYSPONGE128_RATE;
adlen -= DRYSPONGE128_RATE;
}
/* Process the last block with domain separation and padding */
state->domain = DRYDOMAIN128_ASSOC_DATA;
if (finalize)
state->domain |= DRYDOMAIN128_FINAL;
if (adlen < DRYSPONGE128_RATE)
state->domain |= DRYDOMAIN128_PADDED;
drygascon128_f_wrap(state, ad, (unsigned)adlen);
//drysponge128_g(state);
}
/**
* \brief Processes associated data for DryGASCON256.
*
* \param state DrySPONGE256 sponge state.
* \param ad Points to the associated data.
* \param adlen Length of the associated data, must not be zero.
* \param finalize Non-zero to finalize packet processing because
* the message is zero-length.
*/
static void drygascon256_process_ad
(drysponge256_state_t *state, const unsigned char *ad,
unsigned long long adlen, int finalize)
{
/* Process all blocks except the last one */
while (adlen > DRYSPONGE256_RATE) {
drysponge256_f_absorb(state, ad, DRYSPONGE256_RATE);
drysponge256_g_core(state);
ad += DRYSPONGE256_RATE;
adlen -= DRYSPONGE256_RATE;
}
/* Process the last block with domain separation and padding */
state->domain = DRYDOMAIN256_ASSOC_DATA;
if (finalize)
state->domain |= DRYDOMAIN256_FINAL;
if (adlen < DRYSPONGE256_RATE)
state->domain |= DRYDOMAIN256_PADDED;
drysponge256_f_absorb(state, ad, (unsigned)adlen);
drysponge256_g(state);
}
int drygascon128_aead_encrypt_core
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
unsigned int keysize,
const unsigned char *npub,
const unsigned char *k)
{
drysponge128_state_t state;
unsigned temp;
/* Check we are safe */
if(!drysponge128_safe_alignement(&state)){
return -1;
}
/* Set the length of the returned ciphertext */
*clen = mlen + DRYGASCON128_TAG_SIZE;
/* Initialize the sponge state with the key and nonce */
drysponge128_setup(&state, k, keysize, npub, adlen == 0 && mlen == 0);
/* Process the associated data */
if (adlen > 0)
drygascon128_process_ad(&state, ad, adlen, mlen == 0);
/* Encrypt the plaintext to produce the ciphertext */
if (mlen > 0) {
/* Processs all blocks except the last one */
while (mlen > DRYSPONGE128_RATE) {
lw_xor_block_2_src(c, m, state.r.B, DRYSPONGE128_RATE);
drygascon128_f_wrap(&state, m, DRYSPONGE128_RATE);
c += DRYSPONGE128_RATE;
m += DRYSPONGE128_RATE;
mlen -= DRYSPONGE128_RATE;
}
/* Process the last block with domain separation and padding */
state.domain = DRYDOMAIN128_MESSAGE | DRYDOMAIN128_FINAL;
if (mlen < DRYSPONGE128_RATE)
state.domain |= DRYDOMAIN128_PADDED;
temp = (unsigned)mlen;
lw_xor_block_2_src(c, m, state.r.B, temp);
drygascon128_f_wrap(&state, m, temp);
c += temp;
}
/* Generate the authentication tag */
memcpy(c, state.r.B, DRYGASCON128_TAG_SIZE);
return 0;
}
int drygascon128_aead_decrypt_core
(unsigned char *m, unsigned long long *mlen,
unsigned int keysize,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
drysponge128_state_t state;
unsigned char *mtemp = m;
unsigned temp;
/* Check we are safe */
if(!drysponge128_safe_alignement(&state)){
return -1;
}
/* Validate the ciphertext length and set the return "mlen" value */
if (clen < DRYGASCON128_TAG_SIZE)
return -1;
*mlen = clen - DRYGASCON128_TAG_SIZE;
/* Initialize the sponge state with the key and nonce */
clen -= DRYGASCON128_TAG_SIZE;
drysponge128_setup(&state, k, keysize, npub, adlen == 0 && clen == 0);
/* Process the associated data */
if (adlen > 0)
drygascon128_process_ad(&state, ad, adlen, clen == 0);
/* Decrypt the ciphertext to produce the plaintext */
if (clen > 0) {
/* Processs all blocks except the last one */
while (clen > DRYSPONGE128_RATE) {
lw_xor_block_2_src(m, c, state.r.B, DRYSPONGE128_RATE);
drygascon128_f_wrap(&state, m, DRYSPONGE128_RATE);
//drysponge128_g(&state);
c += DRYSPONGE128_RATE;
m += DRYSPONGE128_RATE;
clen -= DRYSPONGE128_RATE;
}
/* Process the last block with domain separation and padding */
state.domain = DRYDOMAIN128_MESSAGE | DRYDOMAIN128_FINAL;
if (clen < DRYSPONGE128_RATE)
state.domain |= DRYDOMAIN128_PADDED;
temp = (unsigned)clen;
lw_xor_block_2_src(m, c, state.r.B, temp);
drygascon128_f_wrap(&state, m, temp);
//drysponge128_g(&state);
c += temp;
}
/* Check the authentication tag */
return aead_check_tag(mtemp, *mlen, state.r.B, c, DRYGASCON128_TAG_SIZE);
}
int drygascon128k16_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k){
return drygascon128_aead_encrypt_core(c,clen,m,mlen,ad,adlen,16,npub,k);
}
int drygascon128k32_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k){
return drygascon128_aead_encrypt_core(c,clen,m,mlen,ad,adlen,32,npub,k);
}
int drygascon128k56_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k){
return drygascon128_aead_encrypt_core(c,clen,m,mlen,ad,adlen,56,npub,k);
}
int drygascon128k16_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k){
return drygascon128_aead_decrypt_core(m,mlen,16,c,clen,ad,adlen,npub,k);
}
int drygascon128k32_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k){
return drygascon128_aead_decrypt_core(m,mlen,32,c,clen,ad,adlen,npub,k);
}
int drygascon128k56_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k){
return drygascon128_aead_decrypt_core(m,mlen,56,c,clen,ad,adlen,npub,k);
}
int drygascon256_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
drysponge256_state_t state;
unsigned temp;
(void)nsec;
/* Set the length of the returned ciphertext */
*clen = mlen + DRYGASCON256_TAG_SIZE;
/* Initialize the sponge state with the key and nonce */
drysponge256_setup(&state, k, npub, adlen == 0 && mlen == 0);
/* Process the associated data */
if (adlen > 0)
drygascon256_process_ad(&state, ad, adlen, mlen == 0);
/* Encrypt the plaintext to produce the ciphertext */
if (mlen > 0) {
/* Processs all blocks except the last one */
while (mlen > DRYSPONGE256_RATE) {
drysponge256_f_absorb(&state, m, DRYSPONGE256_RATE);
lw_xor_block_2_src(c, m, state.r.B, DRYSPONGE256_RATE);
drysponge256_g(&state);
c += DRYSPONGE256_RATE;
m += DRYSPONGE256_RATE;
mlen -= DRYSPONGE256_RATE;
}
/* Process the last block with domain separation and padding */
state.domain = DRYDOMAIN256_MESSAGE | DRYDOMAIN256_FINAL;
if (mlen < DRYSPONGE256_RATE)
state.domain |= DRYDOMAIN256_PADDED;
temp = (unsigned)mlen;
drysponge256_f_absorb(&state, m, temp);
lw_xor_block_2_src(c, m, state.r.B, temp);
drysponge256_g(&state);
c += temp;
}
/* Generate the authentication tag */
memcpy(c, state.r.B, 16);
drysponge256_g(&state);
memcpy(c + 16, state.r.B, 16);
return 0;
}
int drygascon256_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
drysponge256_state_t state;
unsigned char *mtemp = m;
unsigned temp;
int result;
(void)nsec;
/* Validate the ciphertext length and set the return "mlen" value */
if (clen < DRYGASCON256_TAG_SIZE)
return -1;
*mlen = clen - DRYGASCON256_TAG_SIZE;
/* Initialize the sponge state with the key and nonce */
clen -= DRYGASCON256_TAG_SIZE;
drysponge256_setup(&state, k, npub, adlen == 0 && clen == 0);
/* Process the associated data */
if (adlen > 0)
drygascon256_process_ad(&state, ad, adlen, clen == 0);
/* Decrypt the ciphertext to produce the plaintext */
if (clen > 0) {
/* Processs all blocks except the last one */
while (clen > DRYSPONGE256_RATE) {
lw_xor_block_2_src(m, c, state.r.B, DRYSPONGE256_RATE);
drysponge256_f_absorb(&state, m, DRYSPONGE256_RATE);
drysponge256_g(&state);
c += DRYSPONGE256_RATE;
m += DRYSPONGE256_RATE;
clen -= DRYSPONGE256_RATE;
}
/* Process the last block with domain separation and padding */
state.domain = DRYDOMAIN256_MESSAGE | DRYDOMAIN256_FINAL;
if (clen < DRYSPONGE256_RATE)
state.domain |= DRYDOMAIN256_PADDED;
temp = (unsigned)clen;
lw_xor_block_2_src(m, c, state.r.B, temp);
drysponge256_f_absorb(&state, m, temp);
drysponge256_g(&state);
c += temp;
}
/* Check the authentication tag which is split into two pieces */
result = aead_check_tag(0, 0, state.r.B, c, 16);
drysponge256_g(&state);
return aead_check_tag_precheck
(mtemp, *mlen, state.r.B, c + 16, 16, ~result);
}
/**
* \brief Precomputed initialization vector for DryGASCON128-HASH.
*
* This is the CST_H value from the DryGASCON specification after it
* has been processed by the key setup function for DrySPONGE128.
*/
static unsigned char const drygascon128_hash_init[] = {
/* c */
0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3,
0x13, 0x19, 0x8a, 0x2e, 0x03, 0x70, 0x73, 0x44,
0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3,
0x13, 0x19, 0x8a, 0x2e, 0x03, 0x70, 0x73, 0x44,
0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3,
/* x */
0xa4, 0x09, 0x38, 0x22, 0x29, 0x9f, 0x31, 0xd0,
0x08, 0x2e, 0xfa, 0x98, 0xec, 0x4e, 0x6c, 0x89
};
int drygascon128_hash
(unsigned char *out, const unsigned char *in, unsigned long long inlen)
{
drysponge128_state_t state;
memcpy(state.c.B, drygascon128_hash_init, sizeof(state.c.B));
memcpy(state.x.B, drygascon128_hash_init + sizeof(state.c.B),
sizeof(state.x.B));
state.domain = 0;
state.rounds = DRYSPONGE128_ROUNDS;
drygascon128_process_ad(&state, in, inlen, 1);
memcpy(out, state.r.B, 16);
drysponge128_g(&state);
memcpy(out + 16, state.r.B, 16);
return 0;
}
/**
* \brief Precomputed initialization vector for DryGASCON256-HASH.
*
* This is the CST_H value from the DryGASCON specification after it
* has been processed by the key setup function for DrySPONGE256.
*/
static unsigned char const drygascon256_hash_init[] = {
/* c */
0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3,
0x13, 0x19, 0x8a, 0x2e, 0x03, 0x70, 0x73, 0x44,
0xa4, 0x09, 0x38, 0x22, 0x29, 0x9f, 0x31, 0xd0,
0x08, 0x2e, 0xfa, 0x98, 0xec, 0x4e, 0x6c, 0x89,
0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3,
0x13, 0x19, 0x8a, 0x2e, 0x03, 0x70, 0x73, 0x44,
0xa4, 0x09, 0x38, 0x22, 0x29, 0x9f, 0x31, 0xd0,
0x08, 0x2e, 0xfa, 0x98, 0xec, 0x4e, 0x6c, 0x89,
0x24, 0x3f, 0x6a, 0x88, 0x85, 0xa3, 0x08, 0xd3,
/* x */
0x45, 0x28, 0x21, 0xe6, 0x38, 0xd0, 0x13, 0x77,
0xbe, 0x54, 0x66, 0xcf, 0x34, 0xe9, 0x0c, 0x6c
};
int drygascon256_hash
(unsigned char *out, const unsigned char *in, unsigned long long inlen)
{
drysponge256_state_t state;
memcpy(state.c.B, drygascon256_hash_init, sizeof(state.c.B));
memcpy(state.x.B, drygascon256_hash_init + sizeof(state.c.B),
sizeof(state.x.B));
state.domain = 0;
state.rounds = DRYSPONGE256_ROUNDS;
drygascon256_process_ad(&state, in, inlen, 1);
memcpy(out, state.r.B, 16);
drysponge256_g(&state);
memcpy(out + 16, state.r.B, 16);
drysponge256_g(&state);
memcpy(out + 32, state.r.B, 16);
drysponge256_g(&state);
memcpy(out + 48, state.r.B, 16);
return 0;
}
/*
* Copyright (C) 2020 Southern Storm Software, Pty Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef LWCRYPTO_DRYGASCON_H
#define LWCRYPTO_DRYGASCON_H
#include "aead-common.h"
/**
* \file drygascon.h
* \brief DryGASCON authenticated encryption algorithm.
*
* DryGASCON is a family of authenticated encryption algorithms based
* around a generalised version of the ASCON permutation. DryGASCON
* is designed to provide some protection against power analysis.
*
* There are four algorithms in the DryGASCON family:
*
* \li DryGASCON128 is an authenticated encryption algorithm with a
* 128-bit key, a 128-bit nonce, and a 128-bit authentication tag.
* \li DryGASCON256 is an authenticated encryption algorithm with a
* 256-bit key, a 128-bit nonce, and a 128-256 authentication tag.
* \li DryGASCON128-HASH is a hash algorithm with a 256-bit output.
* \li DryGASCON256-HASH is a hash algorithm with a 512-bit output.
*
* DryGASCON128 and DryGASCON128-HASH are the primary members of the family.
*
* References: https://github.com/sebastien-riou/DryGASCON
*/
#ifdef __cplusplus
extern "C" {
#endif
/**
* \brief Minimum Size of the key for DryGASCON128.
*/
#define DRYGASCON128_MINKEY_SIZE 16
/**
* \brief Fast Size of the key for DryGASCON128.
*/
#define DRYGASCON128_FASTKEY_SIZE 32
/**
* \brief Safe (and fast) Size of the key for DryGASCON128.
* Safe here means the size of the key helps prevent SPA during key loading
*/
#define DRYGASCON128_SAFEKEY_SIZE 56
/**
* \brief Size of the key for DryGASCON128 (default to "fast" size).
*/
#define DRYGASCON128_KEY_SIZE DRYGASCON128_FASTKEY_SIZE
/**
* \brief Size of the authentication tag for DryGASCON128.
*/
#define DRYGASCON128_TAG_SIZE 16
/**
* \brief Size of the nonce for DryGASCON128.
*/
#define DRYGASCON128_NONCE_SIZE 16
/**
* \brief Size of the hash output for DryGASCON128-HASH.
*/
#define DRYGASCON128_HASH_SIZE 32
/**
* \brief Size of the key for DryGASCON256.
*/
#define DRYGASCON256_KEY_SIZE 32
/**
* \brief Size of the authentication tag for DryGASCON256.
*/
#define DRYGASCON256_TAG_SIZE 32
/**
* \brief Size of the nonce for DryGASCON256.
*/
#define DRYGASCON256_NONCE_SIZE 16
/**
* \brief Size of the hash output for DryGASCON256-HASH.
*/
#define DRYGASCON256_HASH_SIZE 64
/**
* \brief Meta-information block for the DryGASCON128 cipher with 32 bytes key.
*/
extern aead_cipher_t const drygascon128k32_cipher;
/**
* \brief Meta-information block for the DryGASCON128 cipher with 56 bytes key.
*/
extern aead_cipher_t const drygascon128k56_cipher;
/**
* \brief Meta-information block for the DryGASCON128 cipher with 16 bytes key.
*/
extern aead_cipher_t const drygascon128k16_cipher;
/**
* \brief Meta-information block for the DryGASCON128 cipher (default to 32 bytes key).
*/
extern aead_cipher_t const drygascon128_cipher;
/**
* \brief Meta-information block for the DryGASCON256 cipher.
*/
extern aead_cipher_t const drygascon256_cipher;
/**
* \brief Meta-information block for DryGASCON128-HASH.
*/
extern aead_hash_algorithm_t const drygascon128_hash_algorithm;
/**
* \brief Meta-information block for DryGASCON256-HASH.
*/
extern aead_hash_algorithm_t const drygascon256_hash_algorithm;
/**
* \brief Encrypts and authenticates a packet with DryGASCON128 with 32 bytes key.
*
* Use this key size if SPA attacks are not a concern in your use case.
*
* \param c Buffer to receive the output.
* \param clen On exit, set to the length of the output which includes
* the ciphertext and the 16 byte authentication tag.
* \param m Buffer that contains the plaintext message to encrypt.
* \param mlen Length of the plaintext message in bytes.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param nsec Secret nonce - not used by this algorithm.
* \param npub Points to the public nonce for the packet which must
* be 16 bytes in length.
* \param k Points to the 32 bytes of the key to use to encrypt the packet.
*
* Note that the function blocks if the 16 last bytes of the key are "invalid".
* Here "invalid" means that 32 bit words shall be different from each other.
*
* \return 0 on success, or a negative value if there was an error in
* the parameters.
*
* \sa drygascon128k32_aead_decrypt()
*/
int drygascon128k32_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Decrypts and authenticates a packet with DryGASCON128 with 32 bytes key.
*
* Use this key size if SPA attacks are not a concern in your use case.
*
* \param m Buffer to receive the plaintext message on output.
* \param mlen Receives the length of the plaintext message on output.
* \param nsec Secret nonce - not used by this algorithm.
* \param c Buffer that contains the ciphertext and authentication
* tag to decrypt.
* \param clen Length of the input data in bytes, which includes the
* ciphertext and the 16 byte authentication tag.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param npub Points to the public nonce for the packet which must
* be 16 bytes in length.
* \param k Points to the 32 bytes of the key to use to decrypt the packet.
*
* Note that the function blocks if the 16 last bytes of the key are "invalid".
* Here "invalid" means that 32 bit words shall be different from each other.
*
* \return 0 on success, -1 if the authentication tag was incorrect,
* or some other negative number if there was an error in the parameters.
*
* \sa drygascon128k32_aead_encrypt()
*/
int drygascon128k32_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Encrypts and authenticates a packet with DryGASCON128 with 56 bytes key.
*
* Use this key size if you want to prevent SPA attacks
*
* \param c Buffer to receive the output.
* \param clen On exit, set to the length of the output which includes
* the ciphertext and the 16 byte authentication tag.
* \param m Buffer that contains the plaintext message to encrypt.
* \param mlen Length of the plaintext message in bytes.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param nsec Secret nonce - not used by this algorithm.
* \param npub Points to the public nonce for the packet which must
* be 16 bytes in length.
* \param k Points to the 56 bytes of the key to use to encrypt the packet.
*
* Note that the function blocks if the 16 last bytes of the key are "invalid".
* Here "invalid" means that 32 bit words shall be different from each other.
*
* \return 0 on success, or a negative value if there was an error in
* the parameters.
*
* \sa drygascon128k56_aead_decrypt()
*/
int drygascon128k56_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Decrypts and authenticates a packet with DryGASCON128 with 56 bytes key.
*
* Use this key size if you want to prevent SPA attacks
*
* \param m Buffer to receive the plaintext message on output.
* \param mlen Receives the length of the plaintext message on output.
* \param nsec Secret nonce - not used by this algorithm.
* \param c Buffer that contains the ciphertext and authentication
* tag to decrypt.
* \param clen Length of the input data in bytes, which includes the
* ciphertext and the 16 byte authentication tag.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param npub Points to the public nonce for the packet which must
* be 16 bytes in length.
* \param k Points to the 56 bytes of the key to use to decrypt the packet.
*
* Note that the function blocks if the 16 last bytes of the key are "invalid".
* Here "invalid" means that 32 bit words shall be different from each other.
*
* \return 0 on success, -1 if the authentication tag was incorrect,
* or some other negative number if there was an error in the parameters.
*
* \sa drygascon128k56_aead_encrypt()
*/
int drygascon128k56_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Encrypts and authenticates a packet with DryGASCON128 with 16 bytes key.
*
* Use this key size only if you really cannot use the 32 bytes key.
*
* \param c Buffer to receive the output.
* \param clen On exit, set to the length of the output which includes
* the ciphertext and the 16 byte authentication tag.
* \param m Buffer that contains the plaintext message to encrypt.
* \param mlen Length of the plaintext message in bytes.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param nsec Secret nonce - not used by this algorithm.
* \param npub Points to the public nonce for the packet which must
* be 16 bytes in length.
* \param k Points to the 16 bytes of the key to use to encrypt the packet.
*
* \return 0 on success, or a negative value if there was an error in
* the parameters.
*
* \sa drygascon128k16_aead_decrypt()
*/
int drygascon128k16_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Decrypts and authenticates a packet with DryGASCON128 with 16 bytes key.
*
* Use this key size only if you really cannot use the 32 bytes key.
*
* \param m Buffer to receive the plaintext message on output.
* \param mlen Receives the length of the plaintext message on output.
* \param nsec Secret nonce - not used by this algorithm.
* \param c Buffer that contains the ciphertext and authentication
* tag to decrypt.
* \param clen Length of the input data in bytes, which includes the
* ciphertext and the 16 byte authentication tag.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param npub Points to the public nonce for the packet which must
* be 16 bytes in length.
* \param k Points to the 16 bytes of the key to use to decrypt the packet.
*
* \return 0 on success, -1 if the authentication tag was incorrect,
* or some other negative number if there was an error in the parameters.
*
* \sa drygascon128k16_aead_encrypt()
*/
int drygascon128k16_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Encrypts and authenticates a packet with DryGASCON256.
*
* \param c Buffer to receive the output.
* \param clen On exit, set to the length of the output which includes
* the ciphertext and the 16 byte authentication tag.
* \param m Buffer that contains the plaintext message to encrypt.
* \param mlen Length of the plaintext message in bytes.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param nsec Secret nonce - not used by this algorithm.
* \param npub Points to the public nonce for the packet which must
* be 16 bytes in length.
* \param k Points to the 16 bytes of the key to use to encrypt the packet.
*
* \return 0 on success, or a negative value if there was an error in
* the parameters.
*
* \sa drygascon256_aead_decrypt()
*/
int drygascon256_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Decrypts and authenticates a packet with DryGASCON256.
*
* \param m Buffer to receive the plaintext message on output.
* \param mlen Receives the length of the plaintext message on output.
* \param nsec Secret nonce - not used by this algorithm.
* \param c Buffer that contains the ciphertext and authentication
* tag to decrypt.
* \param clen Length of the input data in bytes, which includes the
* ciphertext and the 16 byte authentication tag.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param npub Points to the public nonce for the packet which must
* be 16 bytes in length.
* \param k Points to the 16 bytes of the key to use to decrypt the packet.
*
* \return 0 on success, -1 if the authentication tag was incorrect,
* or some other negative number if there was an error in the parameters.
*
* \sa drygascon256_aead_encrypt()
*/
int drygascon256_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Hashes a block of input data with DRYGASCON128.
*
* \param out Buffer to receive the hash output which must be at least
* DRYGASCON128_HASH_SIZE bytes in length.
* \param in Points to the input data to be hashed.
* \param inlen Length of the input data in bytes.
*
* \return Returns zero on success or -1 if there was an error in the
* parameters.
*/
int drygascon128_hash
(unsigned char *out, const unsigned char *in, unsigned long long inlen);
/**
* \brief Hashes a block of input data with DRYGASCON256.
*
* \param out Buffer to receive the hash output which must be at least
* DRYGASCON256_HASH_SIZE bytes in length.
* \param in Points to the input data to be hashed.
* \param inlen Length of the input data in bytes.
*
* \return Returns zero on success or -1 if there was an error in the
* parameters.
*/
int drygascon256_hash
(unsigned char *out, const unsigned char *in, unsigned long long inlen);
#ifdef __cplusplus
}
#endif
#endif
/**
DryGascon128 'v6m implementation'
Sebastien Riou, May 27th 2020
Implementation optimized for ARM-Cortex-M0 (Size and Speed)
*/
#if defined(__DRYGASCON_ARM_SELECTOR_H__)
.cpu cortex-m0
.syntax unified
.code 16
.thumb_func
.align 1
.global drygascon128_g_v6m
.global drygascon128_f_v6m
.equ C0, 0
.equ C1, C0+8
.equ C2, C0+16
.equ C3, C0+24
.equ C4, C0+32
.equ R0, 48
.equ R1, R0+8
.equ X0, 64
.equ X1, X0+8
.equ X0L, X0
.equ X1L, X1
.equ C0L, C0
.equ C1L, C1
.equ C2L, C2
.equ C3L, C3
.equ C4L, C4
.equ R0L, R0
.equ R1L, R1
.equ X0H, X0+4
.equ X1H, X1+4
.equ C0H, C0+4
.equ C1H, C1+4
.equ C2H, C2+4
.equ C3H, C3+4
.equ C4H, C4+4
.equ R0H, R0+4
.equ R1H, R1+4
.equ R32_0, R0L
.equ R32_1, R0H
.equ R32_2, R1L
.equ R32_3, R1H
.type drygascon128_g_v6m, %function
drygascon128_g_v6m:
//r0: state: c,r,x
//r1: rounds
push {r4, r5, r6, r7, lr}
//stack vars:
// 8 round
// 4 rounds
// 0 state address
//r=0
movs r5,#0
str r5,[r0,#R32_0]
str r5,[r0,#R32_1]
str r5,[r0,#R32_2]
str r5,[r0,#R32_3]
//round=r5=rounds-1;
subs r6,r1,#1
//base = round_cst+12-rounds
adr r5, round_cst
adds r5,r5,#12
subs r5,r5,r1
push {r0,r5,r6}
ldr r4,[r0,#C4L]
ldr r3,[r0,#C3L]
ldr r2,[r0,#C2L]
ldr r1,[r0,#C1L]
ldr r0,[r0,#C0L]
//loop entry
//assume r1>0 at entry
drygascon128_g_v6m_main_loop:
//r0~r4: lower half of each words of the state
//r5: base for round constants
//r6: round, counting from rounds-1 to 0
//r6 = ((0xf - r6) << 4) | r6;
ldrb r6,[r5,r6]
// addition of round constant
//r2 ^= r6;
eors r2,r2,r6
// substitution layer, lower half
eors r0,r0,r4
eors r4,r4,r3
eors r2,r2,r1
mvns r5,r0
mvns r6,r3
mvns r7,r4
ands r5,r5,r1
ands r6,r6,r4
eors r4,r4,r5
ands r7,r7,r0
mvns r5,r2
ands r5,r5,r3
eors r3,r3,r7
mvns r7,r1
ands r7,r7,r2
eors r2,r2,r6
eors r3,r3,r2
mvns r2,r2
eors r0,r0,r7
eors r1,r1,r5
eors r1,r1,r0
eors r0,r0,r4
ldr r7,[sp,#0]
str r4,[r7,#C4L]
str r3,[r7,#C3L]
str r2,[r7,#C2L]
str r1,[r7,#C1L]
str r0,[r7,#C0L]
ldr r4,[r7,#C4H]
ldr r3,[r7,#C3H]
ldr r2,[r7,#C2H]
ldr r1,[r7,#C1H]
ldr r0,[r7,#C0H]
// substitution layer, upper half
eors r0,r0,r4
eors r4,r4,r3
eors r2,r2,r1
mvns r5,r0
mvns r6,r3
mvns r7,r4
ands r5,r5,r1
ands r6,r6,r4
eors r4,r4,r5
ands r7,r7,r0
mvns r5,r2
ands r5,r5,r3
eors r3,r3,r7
mvns r7,r1
ands r7,r7,r2
eors r2,r2,r6
eors r3,r3,r2
mvns r2,r2
eors r0,r0,r7
eors r1,r1,r5
eors r1,r1,r0
eors r0,r0,r4
// linear diffusion layer
ldr r7,[sp,#0]
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
movs r6,r4
movs r5,#(20)
rors r4,r4,r5
eors r6,r6,r4
ldr r5,[r7,#C4L]
movs r7,#(4)
rors r5,r5,r7
eors r6,r6,r5
ldr r7,[sp,#0]
str r6,[r7,#C4H]
//c4 low part
movs r7,#(32-4)
rors r5,r5,r7
movs r6,r5
movs r7,#((32-20+3)%32)
rors r4,r4,r7
eors r4,r4,r6
movs r7,#(20)
rors r5,r5,r7
eors r4,r4,r5
ldr r7,[sp,#0]
str r4,[r7,#C4L]
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
movs r6,r0
movs r5,#(14)
rors r0,r0,r5
eors r6,r6,r0
ldr r5,[r7,#C0L]
movs r4,#(10)
rors r5,r5,r4
eors r6,r6,r5
str r6,[r7,#C0H]
ldr r4,[r7,#R32_1]
eors r4,r4,r6
str r4,[r7,#R32_1]
//c0 low part
movs r4,#(32-10)
rors r5,r5,r4
movs r6,r5
movs r4,#((32-14+9)%32)
rors r0,r0,r4
eors r0,r0,r6
movs r4,#(14)
rors r5,r5,r4
eors r0,r0,r5
ldr r4,[r7,#R32_0]
eors r4,r4,r0
str r4,[r7,#R32_0]
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
movs r6,r1
movs r5,#(19)
rors r1,r1,r5
eors r6,r6,r1
ldr r5,[r7,#C1L]
movs r4,#(31)
rors r5,r5,r4
eors r6,r6,r5
str r6,[r7,#C1H]
ldr r4,[r7,#R32_3]
eors r4,r4,r6
str r4,[r7,#R32_3]
//c1 low part
movs r4,#(32-31)
rors r5,r5,r4
movs r6,r5
movs r4,#((32-19+30)%32)
rors r1,r1,r4
eors r1,r1,r6
movs r4,#(19)
rors r5,r5,r4
eors r1,r1,r5
ldr r4,[r7,#R32_2]
eors r4,r4,r1
str r4,[r7,#R32_2]
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
movs r6,r2
movs r5,#(3)
rors r2,r2,r5
eors r6,r6,r2
ldr r5,[r7,#C2L]
movs r4,#(1)
rors r5,r5,r4
eors r6,r6,r5
str r6,[r7,#C2H]
ldr r4,[r7,#R32_0]
eors r4,r4,r6
str r4,[r7,#R32_0]
//c2 low part
movs r4,#(32-1)
rors r5,r5,r4
movs r6,r5
movs r4,#((32-3+0)%32)
rors r2,r2,r4
eors r2,r2,r6
movs r4,#(3)
rors r5,r5,r4
eors r2,r2,r5
ldr r4,[r7,#R32_3]
eors r4,r4,r2
str r4,[r7,#R32_3]
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
movs r6,r3
movs r5,#(5)
rors r3,r3,r5
eors r6,r6,r3
ldr r5,[r7,#C3L]
movs r4,#(9)
rors r5,r5,r4
eors r6,r6,r5
str r6,[r7,#C3H]
ldr r4,[r7,#R32_2]
eors r4,r4,r6
str r4,[r7,#R32_2]
//c3 low part
movs r4,#(32-9)
rors r5,r5,r4
movs r6,r5
movs r4,#((32-5+8)%32)
rors r3,r3,r4
eors r3,r3,r6
movs r4,#(5)
rors r5,r5,r4
eors r3,r3,r5
ldr r4,[r7,#R32_1]
eors r4,r4,r3
str r4,[r7,#R32_1]
ldr r4,[r7,#C4L]
ldr r5,[sp,#4]
ldr r6,[sp,#8]
subs r6,#1
bmi drygascon128_g_v6m_exit
str r6,[sp,#8]
b drygascon128_g_v6m_main_loop
drygascon128_g_v6m_exit:
str r3,[r7,#C3L]
str r2,[r7,#C2L]
str r1,[r7,#C1L]
str r0,[r7,#C0L]
add sp,sp,#12
pop {r4, r5, r6, r7, pc}
.size drygascon128_g_v6m, .-drygascon128_g_v6m
.align 2
.type drygascon128_f_v6m, %function
drygascon128_f_v6m:
//r0:state c r x
//r1:input -> shall be 32 bit aligned
//r2:ds
//r3:rounds
push {r4, r5, r6, r7, lr}
//stack frame:
//0 ~ 28-1: buf
//28 :pointer on c
//32 : rounds for g
//36 :mix round / g round
movs r4,#26
push {r0,r3,r4}
sub sp,sp,#28
//load 10 bit mask in r4 = 0x3FF
movs r4,#0xFF
lsls r4,r4,#2
adds r4,r4,#3
movs r7,#0
//r=0
str r7,[r0,#R32_0]
str r7,[r0,#R32_1]
str r7,[r0,#R32_2]
str r7,[r0,#R32_3]
//r7 = sp
add r7,r7,sp
ldr r3,[r1]
movs r5,r4
ands r5,r5,r3
strh r5,[r7,#0+26]
lsrs r3,r3,#10
movs r5,r4
ands r5,r5,r3
strh r5,[r7,#0+24]
lsrs r3,r3,#10
movs r5,r4
ands r5,r5,r3
strh r5,[r7,#0+22]
lsrs r5,r3,#10
ldr r3,[r1,#4]
lsls r6,r3,#2
lsrs r3,r3,#8
orrs r6,r6,r5
movs r5,r4
ands r5,r5,r6
strh r5,[r7,#0+20]
movs r5,r4
ands r5,r5,r3
strh r5,[r7,#0+18]
lsrs r3,r3,#10
movs r5,r4
ands r5,r5,r3
strh r5,[r7,#0+16]
lsrs r5,r3,#10
ldr r3,[r1,#8]
lsls r6,r3,#4
lsrs r3,r3,#6
orrs r6,r6,r5
movs r5,r4
ands r5,r5,r6
strh r5,[r7,#0+14]
movs r5,r4
ands r5,r5,r3
strh r5,[r7,#0+12]
lsrs r3,r3,#10
movs r5,r4
ands r5,r5,r3
strh r5,[r7,#0+10]
lsrs r5,r3,#10
ldr r3,[r1,#12]
lsls r6,r3,#6
lsrs r3,r3,#4
orrs r6,r6,r5
movs r5,r4
ands r5,r5,r6
strh r5,[r7,#0+8]
movs r5,r4
ands r5,r5,r3
strh r5,[r7,#0+6]
lsrs r3,r3,#10
movs r5,r4
ands r5,r5,r3
strh r5,[r7,#0+4]
lsrs r5,r3,#10
lsls r6,r2,#8
lsrs r3,r2,#2
orrs r6,r6,r5
movs r5,r4
ands r5,r5,r6
strh r5,[r7,#0+2]
movs r5,r4
ands r5,r5,r3
strh r5,[r7,#0+0]
movs r7,#26
drygascon128_f_v6m_mix128_main_loop:
movs r6,#0
add r6,r6,sp
ldrh r6,[r6,r7]
ldr r5,[sp,#28]
movs r7,r5
adds r5,r5,#X0
movs r4,#0xc
lsls r0,r6,#2
ands r0,r0,r4
ldr r1,[r5,r0]
ldr r0,[r7,#0*8]
eors r0,r0,r1
lsrs r1,r6,#0
ands r1,r1,r4
ldr r2,[r5,r1]
ldr r1,[r7,#1*8]
eors r1,r1,r2
lsrs r2,r6,#2
ands r2,r2,r4
ldr r3,[r5,r2]
ldr r2,[r7,#2*8]
eors r2,r2,r3
lsrs r3,r6,#4
ands r3,r3,r4
ldr r4,[r5,r3]
ldr r3,[r7,#3*8]
eors r3,r3,r4
lsrs r4,r6,#6+2
lsls r4,r4,#2
ldr r6,[r5,r4]
ldr r4,[r7,#4*8]
eors r4,r4,r6
ldr r6,[sp,#36]
subs r6,#2
bpl drygascon128_f_v6m_mix128_coreround
b drygascon128_f_v6m_mix128_exit
drygascon128_f_v6m_mix128_coreround:
str r6,[sp,#36]
movs r6,#0xf0
// addition of round constant
//r2 ^= r6;
eors r2,r2,r6
// substitution layer, lower half
eors r0,r0,r4
eors r4,r4,r3
eors r2,r2,r1
mvns r5,r0
mvns r6,r3
mvns r7,r4
ands r5,r5,r1
ands r6,r6,r4
eors r4,r4,r5
ands r7,r7,r0
mvns r5,r2
ands r5,r5,r3
eors r3,r3,r7
mvns r7,r1
ands r7,r7,r2
eors r2,r2,r6
eors r3,r3,r2
mvns r2,r2
eors r0,r0,r7
eors r1,r1,r5
eors r1,r1,r0
eors r0,r0,r4
ldr r7,[sp,#28]
str r4,[r7,#C4L]
str r3,[r7,#C3L]
str r2,[r7,#C2L]
str r1,[r7,#C1L]
str r0,[r7,#C0L]
ldr r4,[r7,#C4H]
ldr r3,[r7,#C3H]
ldr r2,[r7,#C2H]
ldr r1,[r7,#C1H]
ldr r0,[r7,#C0H]
// substitution layer, upper half
eors r0,r0,r4
eors r4,r4,r3
eors r2,r2,r1
mvns r5,r0
mvns r6,r3
mvns r7,r4
ands r5,r5,r1
ands r6,r6,r4
eors r4,r4,r5
ands r7,r7,r0
mvns r5,r2
ands r5,r5,r3
eors r3,r3,r7
mvns r7,r1
ands r7,r7,r2
eors r2,r2,r6
eors r3,r3,r2
mvns r2,r2
eors r0,r0,r7
eors r1,r1,r5
eors r1,r1,r0
eors r0,r0,r4
// linear diffusion layer
ldr r7,[sp,#28]
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
movs r6,r4
movs r5,#(20)
rors r4,r4,r5
eors r6,r6,r4
ldr r5,[r7,#C4L]
movs r7,#(4)
rors r5,r5,r7
eors r6,r6,r5
ldr r7,[sp,#28]
str r6,[r7,#C4H]
//c4 low part
movs r7,#(32-4)
rors r5,r5,r7
movs r6,r5
movs r7,#((32-20+3)%32)
rors r4,r4,r7
eors r4,r4,r6
movs r7,#(20)
rors r5,r5,r7
eors r4,r4,r5
ldr r7,[sp,#28]
str r4,[r7,#C4L]
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
movs r6,r0
movs r5,#(14)
rors r0,r0,r5
eors r6,r6,r0
ldr r5,[r7,#C0L]
movs r4,#(10)
rors r5,r5,r4
eors r6,r6,r5
str r6,[r7,#C0H]
//c0 low part
movs r4,#(32-10)
rors r5,r5,r4
movs r6,r5
movs r4,#((32-14+9)%32)
rors r0,r0,r4
eors r0,r0,r6
movs r4,#(14)
rors r5,r5,r4
eors r0,r0,r5
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
movs r6,r1
movs r5,#(19)
rors r1,r1,r5
eors r6,r6,r1
ldr r5,[r7,#C1L]
movs r4,#(31)
rors r5,r5,r4
eors r6,r6,r5
str r6,[r7,#C1H]
//c1 low part
movs r4,#(32-31)
rors r5,r5,r4
movs r6,r5
movs r4,#((32-19+30)%32)
rors r1,r1,r4
eors r1,r1,r6
movs r4,#(19)
rors r5,r5,r4
eors r1,r1,r5
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
movs r6,r2
movs r5,#(3)
rors r2,r2,r5
eors r6,r6,r2
ldr r5,[r7,#C2L]
movs r4,#(1)
rors r5,r5,r4
eors r6,r6,r5
str r6,[r7,#C2H]
//c2 low part
movs r4,#(32-1)
rors r5,r5,r4
movs r6,r5
movs r4,#((32-3+0)%32)
rors r2,r2,r4
eors r2,r2,r6
movs r4,#(3)
rors r5,r5,r4
eors r2,r2,r5
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
movs r6,r3
movs r5,#(5)
rors r3,r3,r5
eors r6,r6,r3
ldr r5,[r7,#C3L]
movs r4,#(9)
rors r5,r5,r4
eors r6,r6,r5
str r6,[r7,#C3H]
//c3 low part
movs r4,#(32-9)
rors r5,r5,r4
movs r6,r5
movs r4,#((32-5+8)%32)
rors r3,r3,r4
eors r3,r3,r6
movs r4,#(5)
rors r5,r5,r4
eors r3,r3,r5
str r3,[r7,#C3L]
str r2,[r7,#C2L]
str r1,[r7,#C1L]
str r0,[r7,#C0L]
ldr r7,[sp,#36]
b drygascon128_f_v6m_mix128_main_loop
drygascon128_f_v6m_mix128_exit:
ldr r7,[sp,#32]
//round=r5=rounds-1;
subs r6,r7,#1
//base = round_cst+12-rounds
adr r5, round_cst
adds r5,r5,#12
subs r5,r5,r7
add sp,sp,#28
str r5,[sp,#4]
str r6,[sp,#8]
//push {r0,r1,r2,r3}
//ldr r0,[sp,#16]
//bl print_state
//pop {r0,r1,r2,r3}
b drygascon128_g_v6m_main_loop
.align 2
round_cst:
.byte 0x4b
.byte 0x5a
.byte 0x69
.byte 0x78
.byte 0x87
.byte 0x96
.byte 0xa5
.byte 0xb4
.byte 0xc3
.byte 0xd2
.byte 0xe1
.byte 0xf0
.align 2
.size drygascon128_f_v6m, .-drygascon128_f_v6m
#endif
/**
DryGascon128 'v7m implementation'
Sebastien Riou, May 27th 2020
Implementation optimized for ARM-Cortex-M7/M4/M3 (Size and Speed)
Safe against timing attack on X look up operations under
the following conditions: (safe if at least one line is true)
- System without cache
- State stored in non cacheable memory (like DTCM)
- Cache lines are 16 bytes or larger AND X is 16 bytes aligned
Notes:
- Arm Cortex-M7 Processor Technical Reference Manual Revision r1p2 states
that data cache line size is 32 bytes.
- Microchip app note TB3186 shows that Microchip use 16 bytes cache lines.
- ST does not give a general statement about cache lines for its products based
on M3 and M4. That said STM32F411xC/E datasheet (RM0383
Reference manual) shows data cache lines of 16 bytes.
- In the unlikely case in which none of the condition can be met,
the 'v7m_fpu_x' can be used to prevent this attack.
Note that implementation 'v7m_fpu' is faster (but requires FPU).
*/
#if defined(__DRYGASCON_ARM_SELECTOR_H__)
.cpu cortex-m3
.syntax unified
.code 16
.thumb_func
.align 1
.global drygascon128_g_v7m
.global drygascon128_f_v7m
.global drygascon128_g0_v7m
.equ C0, 0
.equ C1, C0+8
.equ C2, C0+16
.equ C3, C0+24
.equ C4, C0+32
.equ R0, 48
.equ R1, R0+8
.equ X0, 64
.equ X1, X0+8
.equ X0L, X0
.equ X1L, X1
.equ C0L, C0
.equ C1L, C1
.equ C2L, C2
.equ C3L, C3
.equ C4L, C4
.equ R0L, R0
.equ R1L, R1
.equ X0H, X0+4
.equ X1H, X1+4
.equ C0H, C0+4
.equ C1H, C1+4
.equ C2H, C2+4
.equ C3H, C3+4
.equ C4H, C4+4
.equ R0H, R0+4
.equ R1H, R1+4
.equ R32_0, R0L
.equ R32_1, R0H
.equ R32_2, R1L
.equ R32_3, R1H
.type drygascon128_g_v7m, %function
drygascon128_g_v7m:
//r0: state: c,r,x
//r1: rounds
push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
//stack vars:
// 8 round
// 4 rounds (base address for lookups)
// 0 state address
//r=0
movs r10,#0
str r10,[r0,#R32_0]
str r10,[r0,#R32_1]
str r10,[r0,#R32_2]
str r10,[r0,#R32_3]
//round=r10=rounds-1;
subs r11,r1,#1
//base = round_cst+12-rounds
adr r10, round_cst
adds r10,r10,#12
subs r10,r10,r1
push {r0,r10,r11}
//Load C
adds r14,r0,#C0
LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
//loop entry
//assume r11>0 at entry
drygascon128_g_v7m_main_loop:
//r0~r9: c
//r10: base for round constants
//r11: round, counting from rounds-1 to 0
//r11 = ((0xf - r11) << 4) | r11;
ldrb r11,[r10,r11]
//state:
//r0 to r9: c
//r11: constant to add as round constant
//r14: pointer on C
// addition of round constant
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
ldr r12,[r14,#R32_1-C0]
eors r12,r12,r1
str r12,[r14,#R32_1-C0]
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
ldr r12,[r14,#R32_0-C0]
eors r12,r12,r0
str r12,[r14,#R32_0-C0]
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
ldr r12,[r14,#R32_3-C0]
eors r12,r12,r3
str r12,[r14,#R32_3-C0]
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
ldr r12,[r14,#R32_2-C0]
eors r12,r12,r2
str r12,[r14,#R32_2-C0]
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
ldr r12,[r14,#R32_0-C0]
eors r12,r12,r5
str r12,[r14,#R32_0-C0]
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
ldr r12,[r14,#R32_3-C0]
eors r12,r12,r4
str r12,[r14,#R32_3-C0]
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
ldr r12,[r14,#R32_2-C0]
eors r12,r12,r7
str r12,[r14,#R32_2-C0]
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
ldr r12,[r14,#R32_1-C0]
eors r12,r12,r6
str r12,[r14,#R32_1-C0]
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
ldr r10,[sp,#4]
ldr r11,[sp,#8]
subs r11,#1
bmi drygascon128_g_v7m_exit
str r11,[sp,#8]
b drygascon128_g_v7m_main_loop
drygascon128_g_v7m_exit:
//update C
STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
add sp,sp,#12
pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
.size drygascon128_g_v7m, .-drygascon128_g_v7m
.align 2
.type drygascon128_f_v7m, %function
drygascon128_f_v7m:
//r0:state
//r1:input
//r2:ds
//r3:rounds
push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
//stack frame:
//0: pointer on input
//4: DS value
//8 :pointer on state
//12 : rounds for g
//16 :mix round / g round
movs r10,#0 //init of input bit counter
push {r0,r3,r10} //make the same stack frame as drygascon128_g_cm7
push {r1,r2}
//r=0
str r10,[r0,#R32_0]
str r10,[r0,#R32_1]
str r10,[r0,#R32_2]
str r10,[r0,#R32_3]
//Load C
adds r11,r0,#C0
LDMIA.W r11,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
drygascon128_f_v7m_mix128_main_loop:
//r10 is input bit counter
ldr r11,[sp,#0] //r11 is pointer on input
//r10 r12 shift
// 0 0 0
// 10 1 2
// 20 2 4
// 30 3 6
// 40 5 0
// 50 6 2
// 60 7 4
// 70 8 6
// 80 10 0
// 90 11 2
// 100 12 4
// 110 13 6
// 120 15 0
// 130 16 2 --> we do that operation for 2 last bits in a special last loop
cmp r10,#120
bne drygascon128_f_v7m_mix128_main_loop.regular
//we execute this only during the pen-ultimate operation
//we add the 2 lsb from DS to r14
ldrb r14,[r11,#15]
ldr r10,[sp,#4]
lsl r10,r10,#8
eors r14,r14,r10
b drygascon128_f_v7m_mix128_main_loop.core
drygascon128_f_v7m_mix128_main_loop.regular:
//r12 is base byte: byte offset to read from input buffer
lsr r12,r10,#3 //divide by 8 to get base byte
//r10 becomes shift
lsl r14,r12,#3
sub r10,r10,r14
ldr r14,[r11,r12] //M7 supports unalign access with ldr
lsr r14,r14,r10
drygascon128_f_v7m_mix128_main_loop.core:
ldr r10,[sp,#8]
adds r10,r10,#X0
lsls r11,r14,#2
ands r11,r11,#0xc
ldr r11,[r10,r11]
eors r0,r0,r11
lsrs r11,r14,#0
ands r11,r11,#0xc
ldr r11,[r10,r11]
eors r2,r2,r11
lsrs r11,r14,#2
ands r11,r11,#0xc
ldr r11,[r10,r11]
eors r4,r4,r11
lsrs r11,r14,#4
ands r11,r11,#0xc
ldr r11,[r10,r11]
eors r6,r6,r11
lsrs r11,r14,#6
ands r11,r11,#0xc
ldr r11,[r10,r11]
eors r8,r8,r11
ldr r10,[sp,#16]
adds r10,#10
cmp r10,#140
beq drygascon128_f_v7m_mix128_exit
drygascon128_f_v7m_mix128_coreround:
str r10,[sp,#16]
movs r11,#0xf0
//state:
//r0 to r9: c
//r11: constant to add as round constant
// addition of round constant
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
ldr r10,[sp,#16]
cmp r10,#130
bne drygascon128_f_v7m_mix128_main_loop
//prepare the last loop: load DS 2 msb
ldr r14,[sp,4]
lsr r14,r14,#2
b drygascon128_f_v7m_mix128_main_loop.core
drygascon128_f_v7m_mix128_exit:
ldr r14,[sp,#12]
//round=r10=rounds-1;
subs r11,r14,#1
//base = round_cst+12-rounds
adr r10, round_cst
adds r10,r10,#12
subs r10,r10,r14
str r10,[sp,#12]
str r11,[sp,#16]
ldr r14,[sp,#8]
add sp,sp,#8
b drygascon128_g_v7m_main_loop
.align 2
round_cst:
.byte 0x4b
.byte 0x5a
.byte 0x69
.byte 0x78
.byte 0x87
.byte 0x96
.byte 0xa5
.byte 0xb4
.byte 0xc3
.byte 0xd2
.byte 0xe1
.byte 0xf0
.align 2
.size drygascon128_f_v7m, .-drygascon128_f_v7m
.type drygascon128_g0_v7m, %function
drygascon128_g0_v7m:
//perform a single round without accumulate
//r0: state
push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
//Load C
adds r14,r0,#C0
LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
//r0~r9: c
//r11 = ((0xf - 0) << 4) | 0;
movs r11,#0xf0
//state:
//r0 to r9: c
//r11: constant to add as round constant
// addition of round constant
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
//update C
STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
.size drygascon128_g0_v7m, .-drygascon128_g0_v7m
#endif
/**
DryGascon128 'v7m_fpu implementation'
Sebastien Riou, May 27th 2020
Implementation optimized for ARM-Cortex-M7/M4/M3 (Size and Speed)
Safe against timing attack on X look up operations under
the following conditions: (safe if at least one line is true)
- System without cache
- State stored in non cacheable memory (like DTCM)
- Cache lines are 16 bytes or larger AND X is 16 bytes aligned
Notes:
- Arm Cortex-M7 Processor Technical Reference Manual Revision r1p2 states
that data cache line size is 32 bytes.
- Microchip app note TB3186 shows that Microchip use 16 bytes cache lines.
- ST does not give a general statement about cache lines for its products based
on M3 and M4. That said STM32F411xC/E datasheet (RM0383
Reference manual) shows data cache lines of 16 bytes.
- In the unlikely case in which none of the condition can be met,
the 'v7m_fpu_x' can be used to prevent this attack.
*/
#if defined(__DRYGASCON_ARM_SELECTOR_H__)
.cpu cortex-m3
.syntax unified
.code 16
.thumb_func
.align 1
.global drygascon128_g_v7m_fpu
.global drygascon128_f_v7m_fpu
.global drygascon128_g0_v7m_fpu
.equ C0, 0
.equ C1, C0+8
.equ C2, C0+16
.equ C3, C0+24
.equ C4, C0+32
.equ R0, 48
.equ R1, R0+8
.equ X0, 64
.equ X1, X0+8
.equ X0L, X0
.equ X1L, X1
.equ C0L, C0
.equ C1L, C1
.equ C2L, C2
.equ C3L, C3
.equ C4L, C4
.equ R0L, R0
.equ R1L, R1
.equ X0H, X0+4
.equ X1H, X1+4
.equ C0H, C0+4
.equ C1H, C1+4
.equ C2H, C2+4
.equ C3H, C3+4
.equ C4H, C4+4
.equ R0H, R0+4
.equ R1H, R1+4
.equ R32_0, R0L
.equ R32_1, R0H
.equ R32_2, R1L
.equ R32_3, R1H
.type drygascon128_g_v7m_fpu, %function
drygascon128_g_v7m_fpu:
//r0: state: c,r,x
//r1: rounds
push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
//stack vars:
// 8 round
// 4 rounds (base address for lookups)
// 0 state address
//r=0
VSUB.F32 S10, S10, S10
VSUB.F32 S11, S11, S11
VSUB.F32 S12, S12, S12
VSUB.F32 S13, S13, S13
//round=r10=rounds-1;
subs r11,r1,#1
//base = round_cst+12-rounds
adr r10, round_cst
adds r10,r10,#12
subs r10,r10,r1
push {r0,r10,r11}
//Load C
adds r14,r0,#C0
LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
//loop entry
//assume r11>0 at entry
drygascon128_g_v7m_fpu_main_loop:
//r0~r9: c
//r10: base for round constants
//r11: round, counting from rounds-1 to 0
//r11 = ((0xf - r11) << 4) | r11;
ldrb r11,[r10,r11]
//state:
//r0 to r9: c
//r11: constant to add as round constant
//FPU:
//s11 to s14: r
// addition of round constant
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
vmov r14,S11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//r14 is R32_1
eors r14,r14,r1
vmov r12,S10
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//r12 is R32_0
eors r12,r12,r0
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//r12 is R32_0
eors r12,r12,r5
vmov S10,r12
vmov r12,S13
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//r12 is R32_3
eors r12,r12,r4
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//r12 is R32_3
eors r12,r12,r3
vmov S13,r12
vmov r12,S12
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//r12 is R32_2
eors r12,r12,r2
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//r12 is R32_2
eors r12,r12,r7
vmov S12,r12
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//r14 is R32_1
eors r14,r14,r6
vmov S11,r14
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
ldr r10,[sp,#4]
ldr r11,[sp,#8]
subs r11,#1
bmi drygascon128_g_v7m_fpu_exit
str r11,[sp,#8]
b drygascon128_g_v7m_fpu_main_loop
drygascon128_g_v7m_fpu_exit:
//update C
ldr r14,[sp,#0]
STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
//update R
ldr r11,[sp,#0]
adds r11,r11,#R0
VSTMIA.F32 r11, {S10,S11,S12,S13}
add sp,sp,#12
pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
.size drygascon128_g_v7m_fpu, .-drygascon128_g_v7m_fpu
.align 2
.type drygascon128_f_v7m_fpu, %function
drygascon128_f_v7m_fpu:
//r0:state
//r1:input
//r2:ds
//r3:rounds
push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
//stack frame:
//0: pointer on input
//4: DS value
//8 :pointer on state
//12 : rounds for g
//16 :mix round / g round
movs r10,#0 //init of input bit counter
push {r0,r3,r10} //make the same stack frame as drygascon128_g_cm7
push {r1,r2}
//r=0
VSUB.F32 S10, S10, S10
VSUB.F32 S11, S11, S11
VSUB.F32 S12, S12, S12
VSUB.F32 S13, S13, S13
//Load C
adds r11,r0,#C0
LDMIA.W r11,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
drygascon128_f_v7m_fpu_mix128_main_loop:
//r10 is input bit counter
ldr r11,[sp,#0] //r11 is pointer on input
//r10 r12 shift
// 0 0 0
// 10 1 2
// 20 2 4
// 30 3 6
// 40 5 0
// 50 6 2
// 60 7 4
// 70 8 6
// 80 10 0
// 90 11 2
// 100 12 4
// 110 13 6
// 120 15 0
// 130 16 2 --> we do that operation for 2 last bits in a special last loop
cmp r10,#120
bne drygascon128_f_v7m_fpu_mix128_main_loop.regular
//we execute this only during the pen-ultimate operation
//we add the 2 lsb from DS to r14
ldrb r14,[r11,#15]
ldr r10,[sp,#4]
lsl r10,r10,#8
eors r14,r14,r10
b drygascon128_f_v7m_fpu_mix128_main_loop.core
drygascon128_f_v7m_fpu_mix128_main_loop.regular:
//r12 is base byte: byte offset to read from input buffer
lsr r12,r10,#3 //divide by 8 to get base byte
//r10 becomes shift
lsl r14,r12,#3
sub r10,r10,r14
ldr r14,[r11,r12] //M7 supports unalign access with ldr
lsr r14,r14,r10
drygascon128_f_v7m_fpu_mix128_main_loop.core:
ldr r10,[sp,#8]
adds r10,r10,#X0
lsls r11,r14,#2
ands r11,r11,#0xc
ldr r11,[r10,r11]
eors r0,r0,r11
lsrs r11,r14,#0
ands r11,r11,#0xc
ldr r11,[r10,r11]
eors r2,r2,r11
lsrs r11,r14,#2
ands r11,r11,#0xc
ldr r11,[r10,r11]
eors r4,r4,r11
lsrs r11,r14,#4
ands r11,r11,#0xc
ldr r11,[r10,r11]
eors r6,r6,r11
lsrs r11,r14,#6
ands r11,r11,#0xc
ldr r11,[r10,r11]
eors r8,r8,r11
ldr r10,[sp,#16]
adds r10,#10
cmp r10,#140
beq drygascon128_f_v7m_fpu_mix128_exit
drygascon128_f_v7m_fpu_mix128_coreround:
str r10,[sp,#16]
movs r11,#0xf0
//state:
//r0 to r9: c
//r11: constant to add as round constant
// addition of round constant
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
ldr r10,[sp,#16]
cmp r10,#130
bne drygascon128_f_v7m_fpu_mix128_main_loop
//prepare the last loop: load DS 2 msb
ldr r14,[sp,4]
lsr r14,r14,#2
b drygascon128_f_v7m_fpu_mix128_main_loop.core
drygascon128_f_v7m_fpu_mix128_exit:
ldr r14,[sp,#12]
//round=r10=rounds-1;
subs r11,r14,#1
//base = round_cst+12-rounds
adr r10, round_cst
adds r10,r10,#12
subs r10,r10,r14
str r10,[sp,#12]
str r11,[sp,#16]
add sp,sp,#8
b drygascon128_g_v7m_fpu_main_loop
.align 2
round_cst:
.byte 0x4b
.byte 0x5a
.byte 0x69
.byte 0x78
.byte 0x87
.byte 0x96
.byte 0xa5
.byte 0xb4
.byte 0xc3
.byte 0xd2
.byte 0xe1
.byte 0xf0
.align 2
.size drygascon128_f_v7m_fpu, .-drygascon128_f_v7m_fpu
.type drygascon128_g0_v7m_fpu, %function
drygascon128_g0_v7m_fpu:
//perform a single round without accumulate
//r0: state
push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
//Load C
adds r14,r0,#C0
LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
//r0~r9: c
//r11 = ((0xf - 0) << 4) | 0;
movs r11,#0xf0
//state:
//r0 to r9: c
//r11: constant to add as round constant
// addition of round constant
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
//update C
STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
.size drygascon128_g0_v7m_fpu, .-drygascon128_g0_v7m_fpu
#endif
/**
DryGascon128 'v7m_fpu_x implementation'
Sebastien Riou, May 27th 2020
Implementation optimized for ARM-Cortex-M7/M4/M3 (Size and Speed)
Include protection against timing attack on X look up operations
Note that implementation 'v7m_fpu' is faster and safe on all Cortex-M7 as of May 2020.
*/
#if defined(__DRYGASCON_ARM_SELECTOR_H__)
.cpu cortex-m7
.syntax unified
.code 16
.thumb_func
.align 1
.global drygascon128_g_v7m_fpu_x
.global drygascon128_f_v7m_fpu_x
.global drygascon128_g0_v7m_fpu_x
.equ C0, 0
.equ C1, C0+8
.equ C2, C0+16
.equ C3, C0+24
.equ C4, C0+32
.equ R0, 48
.equ R1, R0+8
.equ X0, 64
.equ X1, X0+8
.equ X0L, X0
.equ X1L, X1
.equ C0L, C0
.equ C1L, C1
.equ C2L, C2
.equ C3L, C3
.equ C4L, C4
.equ R0L, R0
.equ R1L, R1
.equ X0H, X0+4
.equ X1H, X1+4
.equ C0H, C0+4
.equ C1H, C1+4
.equ C2H, C2+4
.equ C3H, C3+4
.equ C4H, C4+4
.equ R0H, R0+4
.equ R1H, R1+4
.equ R32_0, R0L
.equ R32_1, R0H
.equ R32_2, R1L
.equ R32_3, R1H
.type drygascon128_g_v7m_fpu_x, %function
drygascon128_g_v7m_fpu_x:
//r0: state: c,r,x
//r1: rounds
push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
//stack vars:
// 8 round
// 4 rounds (base address for lookups)
// 0 state address
//r=0
VSUB.F32 S10, S10, S10
VSUB.F32 S11, S11, S11
VSUB.F32 S12, S12, S12
VSUB.F32 S13, S13, S13
//round=r10=rounds-1;
subs r11,r1,#1
//base = round_cst+12-rounds
adr r10, round_cst
adds r10,r10,#12
subs r10,r10,r1
push {r0,r10,r11}
//Load C
adds r14,r0,#C0
LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
//loop entry
//assume r11>0 at entry
drygascon128_g_v7m_fpu_x_main_loop:
//r0~r9: c
//r10: base for round constants
//r11: round, counting from rounds-1 to 0
//r11 = ((0xf - r11) << 4) | r11;
ldrb r11,[r10,r11]
//state:
//r0 to r9: c
//r11: constant to add as round constant
//FPU:
//s11 to s14: r
// addition of round constant
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
vmov r14,S11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//r14 is R32_1
eors r14,r14,r1
vmov r12,S10
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//r12 is R32_0
eors r12,r12,r0
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//r12 is R32_0
eors r12,r12,r5
vmov S10,r12
vmov r12,S13
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//r12 is R32_3
eors r12,r12,r4
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//r12 is R32_3
eors r12,r12,r3
vmov S13,r12
vmov r12,S12
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//r12 is R32_2
eors r12,r12,r2
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//r12 is R32_2
eors r12,r12,r7
vmov S12,r12
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//r14 is R32_1
eors r14,r14,r6
vmov S11,r14
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
ldr r10,[sp,#4]
ldr r11,[sp,#8]
subs r11,#1
bmi drygascon128_g_v7m_fpu_x_exit
str r11,[sp,#8]
b drygascon128_g_v7m_fpu_x_main_loop
drygascon128_g_v7m_fpu_x_exit:
//update C
ldr r14,[sp,#0]
STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
//update R
ldr r11,[sp,#0]
adds r11,r11,#R0
VSTMIA.F32 r11, {S10,S11,S12,S13}
add sp,sp,#12
pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
.size drygascon128_g_v7m_fpu_x, .-drygascon128_g_v7m_fpu_x
.align 2
.type drygascon128_f_v7m_fpu_x, %function
drygascon128_f_v7m_fpu_x:
//r0:state
//r1:input
//r2:ds
//r3:rounds
push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
//stack frame:
//0: pointer on input
//4: DS value
//8 :pointer on state
//12 : rounds for g
//16 :mix round / g round
movs r10,#0 //init of input bit counter
push {r0,r3,r10} //make the same stack frame as drygascon128_g_cm7
push {r1,r2}
//r=0
VSUB.F32 S10, S10, S10
VSUB.F32 S11, S11, S11
VSUB.F32 S12, S12, S12
VSUB.F32 S13, S13, S13
//Load C
adds r11,r0,#C0
LDMIA.W r11,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
//Load X
adds r11,#X0
VLDMIA.F32 r11, {s0,s1,s2,s3}
drygascon128_f_v7m_fpu_x_mix128_main_loop:
//r10 is input bit counter
ldr r11,[sp,#0] //r11 is pointer on input
//r10 r12 shift
// 0 0 0
// 10 1 2
// 20 2 4
// 30 3 6
// 40 5 0
// 50 6 2
// 60 7 4
// 70 8 6
// 80 10 0
// 90 11 2
// 100 12 4
// 110 13 6
// 120 15 0
// 130 16 2 --> we do that operation for 2 last bits in a special last loop
cmp r10,#120
bne drygascon128_f_v7m_fpu_x_mix128_main_loop.regular
//we execute this only during the pen-ultimate operation
//we add the 2 lsb from DS to r14
ldrb r14,[r11,#15]
ldr r10,[sp,#4]
lsl r10,r10,#8
eors r14,r14,r10
b drygascon128_f_v7m_fpu_x_mix128_main_loop.core
drygascon128_f_v7m_fpu_x_mix128_main_loop.regular:
//r12 is base byte: byte offset to read from input buffer
lsr r12,r10,#3 //divide by 8 to get base byte
//r10 becomes shift
lsl r14,r12,#3
sub r10,r10,r14
ldr r14,[r11,r12] //M7 supports unalign access with ldr
lsr r14,r14,r10
drygascon128_f_v7m_fpu_x_mix128_main_loop.core:
tst r14,#2
VSELEQ.F64 D2, D0, D1
tst r14,#1
VSELEQ.F32 S6, S4, S5
VMOV r11,S6
eors r0,r0,r11
tst r14,#8
VSELEQ.F64 D2, D0, D1
tst r14,#4
VSELEQ.F32 S6, S4, S5
VMOV r11,S6
eors r2,r2,r11
tst r14,#32
VSELEQ.F64 D2, D0, D1
tst r14,#16
VSELEQ.F32 S6, S4, S5
VMOV r11,S6
eors r4,r4,r11
tst r14,#128
VSELEQ.F64 D2, D0, D1
tst r14,#64
VSELEQ.F32 S6, S4, S5
VMOV r11,S6
eors r6,r6,r11
tst r14,#512
VSELEQ.F64 D2, D0, D1
tst r14,#256
VSELEQ.F32 S6, S4, S5
VMOV r11,S6
eors r8,r8,r11
ldr r10,[sp,#16]
adds r10,#10
cmp r10,#140
beq drygascon128_f_v7m_fpu_x_mix128_exit
drygascon128_f_v7m_fpu_x_mix128_coreround:
str r10,[sp,#16]
movs r11,#0xf0
//state:
//r0 to r9: c
//r11: constant to add as round constant
// addition of round constant
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
ldr r10,[sp,#16]
cmp r10,#130
bne drygascon128_f_v7m_fpu_x_mix128_main_loop
//prepare the last loop: load DS 2 msb
ldr r14,[sp,4]
lsr r14,r14,#2
b drygascon128_f_v7m_fpu_x_mix128_main_loop.core
drygascon128_f_v7m_fpu_x_mix128_exit:
ldr r14,[sp,#12]
//round=r10=rounds-1;
subs r11,r14,#1
//base = round_cst+12-rounds
adr r10, round_cst
adds r10,r10,#12
subs r10,r10,r14
str r10,[sp,#12]
str r11,[sp,#16]
add sp,sp,#8
b drygascon128_g_v7m_fpu_x_main_loop
.align 2
round_cst:
.byte 0x4b
.byte 0x5a
.byte 0x69
.byte 0x78
.byte 0x87
.byte 0x96
.byte 0xa5
.byte 0xb4
.byte 0xc3
.byte 0xd2
.byte 0xe1
.byte 0xf0
.align 2
.size drygascon128_f_v7m_fpu_x, .-drygascon128_f_v7m_fpu_x
.type drygascon128_g0_v7m_fpu_x, %function
drygascon128_g0_v7m_fpu_x:
//perform a single round without accumulate
//r0: state
push {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
//Load C
adds r14,r0,#C0
LDMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
//r0~r9: c
//r11 = ((0xf - 0) << 4) | 0;
movs r11,#0xf0
//state:
//r0 to r9: c
//r11: constant to add as round constant
// addition of round constant
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
//update C
STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
.size drygascon128_g0_v7m_fpu_x, .-drygascon128_g0_v7m_fpu_x
#endif
#ifndef __DRYGASCON_ARM_SELECTOR_H__
#define __DRYGASCON_ARM_SELECTOR_H__
//Optional file to select the best implementation for each chip
#ifdef STM32H743xx
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FPU__
#endif
#ifdef STM32F746xx
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FPU__
#endif
#ifdef STM32F103xx
#define __DRYGASCON_ARM_SELECTOR_V7M__
#endif
#ifdef STM32L011xx
#define __DRYGASCON_ARM_SELECTOR_V6M__
#endif
#ifdef __SAM3X8E__
#define __DRYGASCON_ARM_SELECTOR_V7M__
#endif
//TODO: add more chips here
#ifdef __DRYGASCON_ARM_SELECTOR_V7M__
#ifdef __DRYGASCON_ARM_SELECTOR_FPU__
#define DRYGASCON_G_OPT drygascon128_g_v7m_fpu
#define DRYGASCON_F_OPT drygascon128_f_v7m_fpu
#define DRYGASCON_G0_OPT drygascon128_g0_v7m_fpu
#else
#define DRYGASCON_G_OPT drygascon128_g_v7m
#define DRYGASCON_F_OPT drygascon128_f_v7m
#define DRYGASCON_G0_OPT drygascon128_g0_v7m
#endif
#endif
#ifdef __DRYGASCON_ARM_SELECTOR_V6M__
#define DRYGASCON_G_OPT drygascon128_g_v6m
#define DRYGASCON_F_OPT drygascon128_f_v6m
//#define DRYGASCON_G0_OPT drygascon128_g0_v6m
#define DRYGASCON_ALIGN_INPUT_32
#endif
#endif
#include "drygascon.h"
int crypto_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
return drygascon128_aead_encrypt
(c, clen, m, mlen, ad, adlen, nsec, npub, k);
}
int crypto_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
return drygascon128_aead_decrypt
(m, mlen, nsec, c, clen, ad, adlen, npub, k);
}
/*
* Copyright (C) 2020 Southern Storm Software, Pty Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "internal-drysponge.h"
#include <string.h>
#if !defined(__AVR__)
/* Right rotations in bit-interleaved format */
#define intRightRotateEven(x,bits) \
(__extension__ ({ \
uint32_t _x0 = (uint32_t)(x); \
uint32_t _x1 = (uint32_t)((x) >> 32); \
_x0 = rightRotate(_x0, (bits)); \
_x1 = rightRotate(_x1, (bits)); \
_x0 | (((uint64_t)_x1) << 32); \
}))
#define intRightRotateOdd(x,bits) \
(__extension__ ({ \
uint32_t _x0 = (uint32_t)(x); \
uint32_t _x1 = (uint32_t)((x) >> 32); \
_x0 = rightRotate(_x0, ((bits) + 1) % 32); \
_x1 = rightRotate(_x1, (bits)); \
_x1 | (((uint64_t)_x0) << 32); \
}))
#define intRightRotate1_64(x) \
(__extension__ ({ \
uint32_t _x0 = (uint32_t)(x); \
uint32_t _x1 = (uint32_t)((x) >> 32); \
_x0 = rightRotate1(_x0); \
_x1 | (((uint64_t)_x0) << 32); \
}))
#define intRightRotate2_64(x) (intRightRotateEven((x), 1))
#define intRightRotate3_64(x) (intRightRotateOdd((x), 1))
#define intRightRotate4_64(x) (intRightRotateEven((x), 2))
#define intRightRotate5_64(x) (intRightRotateOdd((x), 2))
#define intRightRotate6_64(x) (intRightRotateEven((x), 3))
#define intRightRotate7_64(x) (intRightRotateOdd((x), 3))
#define intRightRotate8_64(x) (intRightRotateEven((x), 4))
#define intRightRotate9_64(x) (intRightRotateOdd((x), 4))
#define intRightRotate10_64(x) (intRightRotateEven((x), 5))
#define intRightRotate11_64(x) (intRightRotateOdd((x), 5))
#define intRightRotate12_64(x) (intRightRotateEven((x), 6))
#define intRightRotate13_64(x) (intRightRotateOdd((x), 6))
#define intRightRotate14_64(x) (intRightRotateEven((x), 7))
#define intRightRotate15_64(x) (intRightRotateOdd((x), 7))
#define intRightRotate16_64(x) (intRightRotateEven((x), 8))
#define intRightRotate17_64(x) (intRightRotateOdd((x), 8))
#define intRightRotate18_64(x) (intRightRotateEven((x), 9))
#define intRightRotate19_64(x) (intRightRotateOdd((x), 9))
#define intRightRotate20_64(x) (intRightRotateEven((x), 10))
#define intRightRotate21_64(x) (intRightRotateOdd((x), 10))
#define intRightRotate22_64(x) (intRightRotateEven((x), 11))
#define intRightRotate23_64(x) (intRightRotateOdd((x), 11))
#define intRightRotate24_64(x) (intRightRotateEven((x), 12))
#define intRightRotate25_64(x) (intRightRotateOdd((x), 12))
#define intRightRotate26_64(x) (intRightRotateEven((x), 13))
#define intRightRotate27_64(x) (intRightRotateOdd((x), 13))
#define intRightRotate28_64(x) (intRightRotateEven((x), 14))
#define intRightRotate29_64(x) (intRightRotateOdd((x), 14))
#define intRightRotate30_64(x) (intRightRotateEven((x), 15))
#define intRightRotate31_64(x) (intRightRotateOdd((x), 15))
#define intRightRotate32_64(x) (intRightRotateEven((x), 16))
#define intRightRotate33_64(x) (intRightRotateOdd((x), 16))
#define intRightRotate34_64(x) (intRightRotateEven((x), 17))
#define intRightRotate35_64(x) (intRightRotateOdd((x), 17))
#define intRightRotate36_64(x) (intRightRotateEven((x), 18))
#define intRightRotate37_64(x) (intRightRotateOdd((x), 18))
#define intRightRotate38_64(x) (intRightRotateEven((x), 19))
#define intRightRotate39_64(x) (intRightRotateOdd((x), 19))
#define intRightRotate40_64(x) (intRightRotateEven((x), 20))
#define intRightRotate41_64(x) (intRightRotateOdd((x), 20))
#define intRightRotate42_64(x) (intRightRotateEven((x), 21))
#define intRightRotate43_64(x) (intRightRotateOdd((x), 21))
#define intRightRotate44_64(x) (intRightRotateEven((x), 22))
#define intRightRotate45_64(x) (intRightRotateOdd((x), 22))
#define intRightRotate46_64(x) (intRightRotateEven((x), 23))
#define intRightRotate47_64(x) (intRightRotateOdd((x), 23))
#define intRightRotate48_64(x) (intRightRotateEven((x), 24))
#define intRightRotate49_64(x) (intRightRotateOdd((x), 24))
#define intRightRotate50_64(x) (intRightRotateEven((x), 25))
#define intRightRotate51_64(x) (intRightRotateOdd((x), 25))
#define intRightRotate52_64(x) (intRightRotateEven((x), 26))
#define intRightRotate53_64(x) (intRightRotateOdd((x), 26))
#define intRightRotate54_64(x) (intRightRotateEven((x), 27))
#define intRightRotate55_64(x) (intRightRotateOdd((x), 27))
#define intRightRotate56_64(x) (intRightRotateEven((x), 28))
#define intRightRotate57_64(x) (intRightRotateOdd((x), 28))
#define intRightRotate58_64(x) (intRightRotateEven((x), 29))
#define intRightRotate59_64(x) (intRightRotateOdd((x), 29))
#define intRightRotate60_64(x) (intRightRotateEven((x), 30))
#define intRightRotate61_64(x) (intRightRotateOdd((x), 30))
#define intRightRotate62_64(x) (intRightRotateEven((x), 31))
#define intRightRotate63_64(x) (intRightRotateOdd((x), 31))
#ifdef DRYGASCON_G0_OPT
void DRYGASCON_G0_OPT(drysponge128_state_t *state);
static void gascon128_g0(drysponge128_state_t *state){
DRYGASCON_G0_OPT(state);
}
#else
void gascon128_core_round(gascon128_state_t *state, uint8_t round)
{
uint64_t t0, t1, t2, t3, t4;
/* Load the state into local varaibles */
#if defined(LW_UTIL_LITTLE_ENDIAN)
uint64_t x0 = state->S[0];
uint64_t x1 = state->S[1];
uint64_t x2 = state->S[2];
uint64_t x3 = state->S[3];
uint64_t x4 = state->S[4];
#else
uint64_t x0 = le_load_word64(state->B);
uint64_t x1 = le_load_word64(state->B + 8);
uint64_t x2 = le_load_word64(state->B + 16);
uint64_t x3 = le_load_word64(state->B + 24);
uint64_t x4 = le_load_word64(state->B + 32);
#endif
/* Add the round constant to the middle of the state */
x2 ^= ((0x0F - round) << 4) | round;
/* Substitution layer */
x0 ^= x4; x2 ^= x1; x4 ^= x3; t0 = (~x0) & x1; t1 = (~x1) & x2;
t2 = (~x2) & x3; t3 = (~x3) & x4; t4 = (~x4) & x0; x0 ^= t1;
x1 ^= t2; x2 ^= t3; x3 ^= t4; x4 ^= t0; x1 ^= x0; x3 ^= x2;
x0 ^= x4; x2 = ~x2;
/* Linear diffusion layer */
x0 ^= intRightRotate19_64(x0) ^ intRightRotate28_64(x0);
x1 ^= intRightRotate61_64(x1) ^ intRightRotate38_64(x1);
x2 ^= intRightRotate1_64(x2) ^ intRightRotate6_64(x2);
x3 ^= intRightRotate10_64(x3) ^ intRightRotate17_64(x3);
x4 ^= intRightRotate7_64(x4) ^ intRightRotate40_64(x4);
/* Write the local variables back to the state */
#if defined(LW_UTIL_LITTLE_ENDIAN)
state->S[0] = x0;
state->S[1] = x1;
state->S[2] = x2;
state->S[3] = x3;
state->S[4] = x4;
#else
le_store_word64(state->B, x0);
le_store_word64(state->B + 8, x1);
le_store_word64(state->B + 16, x2);
le_store_word64(state->B + 24, x3);
le_store_word64(state->B + 32, x4);
#endif
}
static void gascon128_g0(drysponge128_state_t *state){
gascon128_core_round(&(state->c), 0);
}
#endif
void gascon256_core_round(gascon256_state_t *state, uint8_t round)
{
uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
/* Load the state into local varaibles */
#if defined(LW_UTIL_LITTLE_ENDIAN)
uint64_t x0 = state->S[0];
uint64_t x1 = state->S[1];
uint64_t x2 = state->S[2];
uint64_t x3 = state->S[3];
uint64_t x4 = state->S[4];
uint64_t x5 = state->S[5];
uint64_t x6 = state->S[6];
uint64_t x7 = state->S[7];
uint64_t x8 = state->S[8];
#else
uint64_t x0 = le_load_word64(state->B);
uint64_t x1 = le_load_word64(state->B + 8);
uint64_t x2 = le_load_word64(state->B + 16);
uint64_t x3 = le_load_word64(state->B + 24);
uint64_t x4 = le_load_word64(state->B + 32);
uint64_t x5 = le_load_word64(state->B + 40);
uint64_t x6 = le_load_word64(state->B + 48);
uint64_t x7 = le_load_word64(state->B + 56);
uint64_t x8 = le_load_word64(state->B + 64);
#endif
/* Add the round constant to the middle of the state */
x4 ^= ((0x0F - round) << 4) | round;
/* Substitution layer */
x0 ^= x8; x2 ^= x1; x4 ^= x3; x6 ^= x5; x8 ^= x7; t0 = (~x0) & x1;
t1 = (~x1) & x2; t2 = (~x2) & x3; t3 = (~x3) & x4; t4 = (~x4) & x5;
t5 = (~x5) & x6; t6 = (~x6) & x7; t7 = (~x7) & x8; t8 = (~x8) & x0;
x0 ^= t1; x1 ^= t2; x2 ^= t3; x3 ^= t4; x4 ^= t5; x5 ^= t6; x6 ^= t7;
x7 ^= t8; x8 ^= t0; x1 ^= x0; x3 ^= x2; x5 ^= x4; x7 ^= x6; x0 ^= x8;
x4 = ~x4;
/* Linear diffusion layer */
x0 ^= intRightRotate19_64(x0) ^ intRightRotate28_64(x0);
x1 ^= intRightRotate61_64(x1) ^ intRightRotate38_64(x1);
x2 ^= intRightRotate1_64(x2) ^ intRightRotate6_64(x2);
x3 ^= intRightRotate10_64(x3) ^ intRightRotate17_64(x3);
x4 ^= intRightRotate7_64(x4) ^ intRightRotate40_64(x4);
x5 ^= intRightRotate31_64(x5) ^ intRightRotate26_64(x5);
x6 ^= intRightRotate53_64(x6) ^ intRightRotate58_64(x6);
x7 ^= intRightRotate9_64(x7) ^ intRightRotate46_64(x7);
x8 ^= intRightRotate43_64(x8) ^ intRightRotate50_64(x8);
/* Write the local variables back to the state */
#if defined(LW_UTIL_LITTLE_ENDIAN)
state->S[0] = x0;
state->S[1] = x1;
state->S[2] = x2;
state->S[3] = x3;
state->S[4] = x4;
state->S[5] = x5;
state->S[6] = x6;
state->S[7] = x7;
state->S[8] = x8;
#else
le_store_word64(state->B, x0);
le_store_word64(state->B + 8, x1);
le_store_word64(state->B + 16, x2);
le_store_word64(state->B + 24, x3);
le_store_word64(state->B + 32, x4);
le_store_word64(state->B + 40, x5);
le_store_word64(state->B + 48, x6);
le_store_word64(state->B + 56, x7);
le_store_word64(state->B + 64, x8);
#endif
}
#ifdef DRYGASCON_G_OPT
void DRYGASCON_G_OPT(uint64_t* state, uint32_t rounds);
//use state only to access c,r,x
static void drysponge128_g_impl(drysponge128_state_t *state,unsigned int rounds)
{
DRYGASCON_G_OPT((uint64_t*)state,rounds);
}
#else
//use state only to access c,r,x
static void drysponge128_g_impl(drysponge128_state_t *state,unsigned int rounds)
{
unsigned round;
/* Perform the first round. For each round we XOR the 16 bytes of
* the output data with the first 16 bytes of the state. And then
* XOR with the next 16 bytes of the state, rotated by 4 bytes */
gascon128_core_round(&(state->c), 0);
state->r.W[0] = state->c.W[0] ^ state->c.W[5];
state->r.W[1] = state->c.W[1] ^ state->c.W[6];
state->r.W[2] = state->c.W[2] ^ state->c.W[7];
state->r.W[3] = state->c.W[3] ^ state->c.W[4];
/* Perform the rest of the rounds */
for (round = 1; round < rounds; ++round) {
gascon128_core_round(&(state->c), round);
state->r.W[0] ^= state->c.W[0] ^ state->c.W[5];
state->r.W[1] ^= state->c.W[1] ^ state->c.W[6];
state->r.W[2] ^= state->c.W[2] ^ state->c.W[7];
state->r.W[3] ^= state->c.W[3] ^ state->c.W[4];
}
}
#endif
void drysponge128_g(drysponge128_state_t *state)
{
drysponge128_g_impl(state,state->rounds);
}
void drysponge256_g(drysponge256_state_t *state)
{
unsigned round;
/* Perform the first round. For each round we XOR the 16 bytes of
* the output data with the first 16 bytes of the state. And then
* XOR with the next 16 bytes of the state, rotated by 4 bytes.
* And so on for a total of 64 bytes XOR'ed into the output data. */
gascon256_core_round(&(state->c), 0);
state->r.W[0] = state->c.W[0] ^ state->c.W[5] ^
state->c.W[10] ^ state->c.W[15];
state->r.W[1] = state->c.W[1] ^ state->c.W[6] ^
state->c.W[11] ^ state->c.W[12];
state->r.W[2] = state->c.W[2] ^ state->c.W[7] ^
state->c.W[8] ^ state->c.W[13];
state->r.W[3] = state->c.W[3] ^ state->c.W[4] ^
state->c.W[9] ^ state->c.W[14];
/* Perform the rest of the rounds */
for (round = 1; round < state->rounds; ++round) {
gascon256_core_round(&(state->c), round);
state->r.W[0] ^= state->c.W[0] ^ state->c.W[5] ^
state->c.W[10] ^ state->c.W[15];
state->r.W[1] ^= state->c.W[1] ^ state->c.W[6] ^
state->c.W[11] ^ state->c.W[12];
state->r.W[2] ^= state->c.W[2] ^ state->c.W[7] ^
state->c.W[8] ^ state->c.W[13];
state->r.W[3] ^= state->c.W[3] ^ state->c.W[4] ^
state->c.W[9] ^ state->c.W[14];
}
}
#endif /* !__AVR__ */
#ifndef DRYGASCON_G_OPT
void drysponge128_g_core(drysponge128_state_t *state)
{
unsigned round;
for (round = 0; round < state->rounds; ++round)
gascon128_core_round(&(state->c), round);
}
#endif
void drysponge256_g_core(drysponge256_state_t *state)
{
unsigned round;
for (round = 0; round < state->rounds; ++round)
gascon256_core_round(&(state->c), round);
}
/**
* \fn uint32_t drysponge_select_x(const uint32_t x[4], uint8_t index)
* \brief Selects an element of x in constant time.
*
* \param x Points to the four elements of x.
* \param index Index of which element to extract between 0 and 3.
*
* \return The selected element of x.
*/
#if defined(__HAS_CACHE__)
STATIC_INLINE uint32_t drysponge_select_x(const uint32_t x[4], uint8_t index)
{
/* We need to be careful how we select each element of x because
* we are doing a data-dependent fetch here. Do the fetch in a way
* that should avoid cache timing issues by fetching every element
* of x and masking away the ones we don't want.
*
* There is a possible side channel here with respect to power analysis.
* The "mask" value will be all-ones for the selected index and all-zeroes
* for the other indexes. This may show up as different power consumption
* for the "result ^= x[i] & mask" statement when i is the selected index.
* Such a side channel could in theory allow reading the plaintext input
* to the cipher by analysing the CPU's power consumption.
*
* The DryGASCON specification acknowledges the possibility of plaintext
* recovery in section 7.4. For software mitigation the specification
* suggests randomization of the indexes into c and x and randomization
* of the order of processing words. We aren't doing that here yet.
* Patches welcome to fix this.
*/
uint32_t mask = -((uint32_t)((0x04 - index) >> 2));
uint32_t result = x[0] & mask;
mask = -((uint32_t)((0x04 - (index ^ 0x01)) >> 2));
result ^= x[1] & mask;
mask = -((uint32_t)((0x04 - (index ^ 0x02)) >> 2));
result ^= x[2] & mask;
mask = -((uint32_t)((0x04 - (index ^ 0x03)) >> 2));
return result ^ (x[3] & mask);
}
#else
/* AVR is more or less immune to cache timing issues because it doesn't
* have anything like an L1 or L2 cache. Select the word directly */
#define drysponge_select_x(x, index) ((x)[(index)])
#endif
#ifndef DRYGASCON_F_OPT
/**
* \brief Mixes a 32-bit value into the DrySPONGE128 state.
*
* \param state DrySPONGE128 state.
* \param data The data to be mixed in the bottom 10 bits.
*/
static void drysponge128_mix_phase_round
(drysponge128_state_t *state, uint32_t data)
{
/* Mix in elements from x according to the 2-bit indexes in the data */
state->c.W[0] ^= drysponge_select_x(state->x.W, data & 0x03);
state->c.W[2] ^= drysponge_select_x(state->x.W, (data >> 2) & 0x03);
state->c.W[4] ^= drysponge_select_x(state->x.W, (data >> 4) & 0x03);
state->c.W[6] ^= drysponge_select_x(state->x.W, (data >> 6) & 0x03);
state->c.W[8] ^= drysponge_select_x(state->x.W, (data >> 8) & 0x03);
}
#endif
/**
* \brief Mixes a 32-bit value into the DrySPONGE256 state.
*
* \param state DrySPONGE256 state.
* \param data The data to be mixed in the bottom 18 bits.
*/
static void drysponge256_mix_phase_round
(drysponge256_state_t *state, uint32_t data)
{
/* Mix in elements from x according to the 2-bit indexes in the data */
state->c.W[0] ^= drysponge_select_x(state->x.W, data & 0x03);
state->c.W[2] ^= drysponge_select_x(state->x.W, (data >> 2) & 0x03);
state->c.W[4] ^= drysponge_select_x(state->x.W, (data >> 4) & 0x03);
state->c.W[6] ^= drysponge_select_x(state->x.W, (data >> 6) & 0x03);
state->c.W[8] ^= drysponge_select_x(state->x.W, (data >> 8) & 0x03);
state->c.W[10] ^= drysponge_select_x(state->x.W, (data >> 10) & 0x03);
state->c.W[12] ^= drysponge_select_x(state->x.W, (data >> 12) & 0x03);
state->c.W[14] ^= drysponge_select_x(state->x.W, (data >> 14) & 0x03);
state->c.W[16] ^= drysponge_select_x(state->x.W, (data >> 16) & 0x03);
}
#ifndef DRYGASCON_F_OPT
/**
* \brief Mixes an input block into a DrySPONGE128 state.
*
* \param state The DrySPONGE128 state.
* \param data Full rate block containing the input data.
*/
static void drysponge128_mix_phase
(drysponge128_state_t *state, const unsigned char data[DRYSPONGE128_RATE],unsigned int ds)
{
/* Mix 10-bit groups into the output, with the domain
* separator added to the last two groups */
drysponge128_mix_phase_round
(state, data[0] | (((uint32_t)(data[1])) << 8));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round
(state, (data[1] >> 2) | (((uint32_t)(data[2])) << 6));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round
(state, (data[2] >> 4) | (((uint32_t)(data[3])) << 4));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round
(state, (data[3] >> 6) | (((uint32_t)(data[4])) << 2));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round
(state, data[5] | (((uint32_t)(data[6])) << 8));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round
(state, (data[6] >> 2) | (((uint32_t)(data[7])) << 6));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round
(state, (data[7] >> 4) | (((uint32_t)(data[8])) << 4));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round
(state, (data[8] >> 6) | (((uint32_t)(data[9])) << 2));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round
(state, data[10] | (((uint32_t)(data[11])) << 8));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round
(state, (data[11] >> 2) | (((uint32_t)(data[12])) << 6));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round
(state, (data[12] >> 4) | (((uint32_t)(data[13])) << 4));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round
(state, ((data[13] >> 6) | (((uint32_t)(data[14])) << 2)));
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round(state, data[15] ^ ds);
gascon128_core_round(&(state->c), 0);
drysponge128_mix_phase_round(state, ds >> 10);
}
#endif
/**
* \brief Mixes an input block into a DrySPONGE256 state.
*
* \param state The DrySPONGE256 state.
* \param data Full rate block containing the input data.
*/
static void drysponge256_mix_phase
(drysponge256_state_t *state, const unsigned char data[DRYSPONGE256_RATE])
{
/* Mix 18-bit groups into the output, with the domain in the last group */
drysponge256_mix_phase_round
(state, data[0] | (((uint32_t)(data[1])) << 8) |
(((uint32_t)(data[2])) << 16));
gascon256_core_round(&(state->c), 0);
drysponge256_mix_phase_round
(state, (data[2] >> 2) | (((uint32_t)(data[3])) << 6) |
(((uint32_t)(data[4])) << 14));
gascon256_core_round(&(state->c), 0);
drysponge256_mix_phase_round
(state, (data[4] >> 4) | (((uint32_t)(data[5])) << 4) |
(((uint32_t)(data[6])) << 12));
gascon256_core_round(&(state->c), 0);
drysponge256_mix_phase_round
(state, (data[6] >> 6) | (((uint32_t)(data[7])) << 2) |
(((uint32_t)(data[8])) << 10));
gascon256_core_round(&(state->c), 0);
drysponge256_mix_phase_round
(state, data[9] | (((uint32_t)(data[10])) << 8) |
(((uint32_t)(data[11])) << 16));
gascon256_core_round(&(state->c), 0);
drysponge256_mix_phase_round
(state, (data[11] >> 2) | (((uint32_t)(data[12])) << 6) |
(((uint32_t)(data[13])) << 14));
gascon256_core_round(&(state->c), 0);
drysponge256_mix_phase_round
(state, (data[13] >> 4) | (((uint32_t)(data[14])) << 4) |
(((uint32_t)(data[15])) << 12));
gascon256_core_round(&(state->c), 0);
drysponge256_mix_phase_round
(state, (data[15] >> 6) ^ state->domain);
/* Revert to the default domain separator for the next block */
state->domain = 0;
}
#ifdef DRYGASCON_F_OPT
void DRYGASCON_F_OPT(drysponge128_state_t *state, const unsigned char *input,unsigned int ds, unsigned int rounds);
static void drygascon128_f_impl(drysponge128_state_t *state, const unsigned char *input,unsigned int ds, unsigned int rounds){
DRYGASCON_F_OPT(state, input, ds, rounds);
}
#else
void drygascon128_f_impl(drysponge128_state_t *state, const unsigned char *input,unsigned int ds, unsigned int rounds){
drysponge128_mix_phase(state, input ,ds);
drysponge128_g_impl(state,rounds);
}
#endif
void drygascon128_f_wrap(drysponge128_state_t *state, const unsigned char *input, unsigned len){
drysponge128_rate_t padded;//enforce alignement (if needed by f_impl)
const unsigned char*in;
if (len < DRYSPONGE128_RATE) {
memcpy(padded.B, input, len);
padded.B[len] = 0x01;
memset(padded.B + len + 1, 0, DRYSPONGE128_RATE - len - 1);
in=padded.B;
} else {
#ifdef DRYGASCON_ALIGN_INPUT_32
memcpy(padded.B,input,DRYSPONGE128_RATE);
in=padded.B;
#else
in=input;
#endif
}
drygascon128_f_impl(state, in,state->domain,state->rounds);
/* Revert to the default domain separator for the next block */
state->domain = 0;
}
void drysponge256_f_absorb
(drysponge256_state_t *state, const unsigned char *input, unsigned len)
{
if (len >= DRYSPONGE256_RATE) {
drysponge256_mix_phase(state, input);
} else {
unsigned char padded[DRYSPONGE256_RATE];
memcpy(padded, input, len);
padded[len] = 0x01;
memset(padded + len + 1, 0, DRYSPONGE256_RATE - len - 1);
drysponge256_mix_phase(state, padded);
}
}
/**
* \brief Determine if some of the words of an "x" value are identical.
*
* \param x Points to the "x" buffer to check.
*
* \return Non-zero if some of the words are the same, zero if they are
* distinct from each other.
*
* We try to perform the check in constant time to avoid giving away
* any information about the value of the key.
*/
static int drysponge_x_words_are_same(const uint32_t x[4])
{
unsigned i, j;
int result = 0;
for (i = 0; i < 3; ++i) {
for (j = i + 1; j < 4; ++j) {
uint32_t check = x[i] ^ x[j];
result |= (int)((0x100000000ULL - check) >> 32);
}
}
return result;
}
int drysponge128_safe_alignement(const drysponge128_state_t*state){
return 0==(0xF & (uintptr_t )&(state->x));
}
void drysponge128_setup
(drysponge128_state_t *state, const unsigned char *key, unsigned int keysize,
const unsigned char *nonce, int final_block)
{
if(DRYGASCON128_SAFEKEY_SIZE==keysize){
/* Fill C and X directly with the key */
memcpy(state->c.B, key, sizeof(state->c));
memcpy(state->x.B, key+ sizeof(state->c), sizeof(state->x));
while (drysponge_x_words_are_same(state->x.W)); //block here if the key is not valid
} else {
/* Fill the GASCON-128 state with repeated copies of the key */
memcpy(state->c.B, key, 16);
memcpy(state->c.B + 16, key, 16);
memcpy(state->c.B + 32, key, 8);
if(DRYGASCON128_FASTKEY_SIZE==keysize){
/* Fill X with the 16 last bytes of the key */
memcpy(state->x.B, key+16, sizeof(state->x));
while (drysponge_x_words_are_same(state->x.W)); //block here if the key is not valid
} else if(DRYGASCON128_MINKEY_SIZE==keysize){
/* Generate the "x" value for the state. All four words of "x"
* must be unique because they will be used in drysponge_select_x()
* as stand-ins for the bit pairs 00, 01, 10, and 11.
*
* Run the core block operation over and over until "x" is unique.
* Technically the runtime here is key-dependent and not constant.
* If the input key is randomized, this should only take 1 round
* on average so it is "almost constant time".
*/
do {
//gascon128_core_round(&(state->c), 0);
//drysponge128_g_impl(state,1);
gascon128_g0(state);
} while (drysponge_x_words_are_same(state->c.W));
memcpy(state->x.W, state->c.W, sizeof(state->x));
/* Replace the generated "x" value in the state with the key prefix */
memcpy(state->c.W, key, sizeof(state->x));
}
}
/* Absorb the nonce into the state with an increased number of rounds */
state->rounds = DRYSPONGE128_INIT_ROUNDS;
state->domain = DRYDOMAIN128_NONCE;
if (final_block)
state->domain |= DRYDOMAIN128_FINAL;
drygascon128_f_wrap(state, nonce, 16);
/* Set up the normal number of rounds for future operations */
state->rounds = DRYSPONGE128_ROUNDS;
}
void drysponge256_setup
(drysponge256_state_t *state, const unsigned char *key,
const unsigned char *nonce, int final_block)
{
/* Fill the GASCON-256 state with repeated copies of the key */
memcpy(state->c.B, key, 32);
memcpy(state->c.B + 32, key, 32);
memcpy(state->c.B + 64, key, 8);
/* Generate the "x" value for the state */
do {
gascon256_core_round(&(state->c), 0);
} while (drysponge_x_words_are_same(state->c.W));
memcpy(state->x.W, state->c.W, sizeof(state->x));
/* Replace the generated "x" value in the state with the key prefix */
memcpy(state->c.W, key, sizeof(state->x));
/* Absorb the nonce into the state with an increased number of rounds */
state->rounds = DRYSPONGE256_INIT_ROUNDS;
state->domain = DRYDOMAIN256_NONCE;
if (final_block)
state->domain |= DRYDOMAIN256_FINAL;
drysponge256_f_absorb(state, nonce, 16);
drysponge256_g(state);
/* Set up the normal number of rounds for future operations */
state->rounds = DRYSPONGE256_ROUNDS;
}
/*
* Copyright (C) 2020 Southern Storm Software, Pty Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef LW_INTERNAL_DRYSPONGE_H
#define LW_INTERNAL_DRYSPONGE_H
#include "drygascon.h"
#include "drygascon128_arm_selector.h"
#include "internal-util.h"
/**
* \file internal-drysponge.h
* \brief Internal implementation of DrySPONGE for the DryGASCON cipher.
*
* References: https://github.com/sebastien-riou/DryGASCON
*/
#ifdef __cplusplus
extern "C" {
#endif
/**
* \brief Size of the GASCON-128 permutation state in bytes.
*/
#define GASCON128_STATE_SIZE 40
/**
* \brief Size of the GASCON-256 permutation state in bytes.
*/
#define GASCON256_STATE_SIZE 72
/**
* \brief Rate of absorption and squeezing for DrySPONGE128.
*/
#define DRYSPONGE128_RATE 16
/**
* \brief Rate of absorption and squeezing for DrySPONGE256.
*/
#define DRYSPONGE256_RATE 16
/**
* \brief Size of the "x" value for DrySPONGE128.
*/
#define DRYSPONGE128_XSIZE 16
/**
* \brief Size of the "x" value for DrySPONGE256.
*/
#define DRYSPONGE256_XSIZE 16
/**
* \brief Normal number of rounds for DrySPONGE128 when absorbing
* and squeezing data.
*/
#define DRYSPONGE128_ROUNDS 7
/**
* \brief Number of rounds for DrySPONGE128 during initialization.
*/
#define DRYSPONGE128_INIT_ROUNDS 11
/**
* \brief Normal number of rounds for DrySPONGE256 when absorbing
* and squeezing data.
*/
#define DRYSPONGE256_ROUNDS 8
/**
* \brief Number of rounds for DrySPONGE256 during initialization.
*/
#define DRYSPONGE256_INIT_ROUNDS 12
#ifdef DRYGASCON_F_OPT
/**
* \brief DrySPONGE128 domain bit for a padded block.
*/
#define DRYDOMAIN128_PADDED (1 << 0)
/**
* \brief DrySPONGE128 domain bit for a final block.
*/
#define DRYDOMAIN128_FINAL (1 << 1)
/**
* \brief DrySPONGE128 domain value for processing the nonce.
*/
#define DRYDOMAIN128_NONCE (1 << 2)
/**
* \brief DrySPONGE128 domain value for processing the associated data.
*/
#define DRYDOMAIN128_ASSOC_DATA (2 << 2)
/**
* \brief DrySPONGE128 domain value for processing the message.
*/
#define DRYDOMAIN128_MESSAGE (3 << 2)
#else
/**
* \brief DrySPONGE128 domain bit for a padded block.
*/
#define DRYDOMAIN128_PADDED (1 << 8)
/**
* \brief DrySPONGE128 domain bit for a final block.
*/
#define DRYDOMAIN128_FINAL (1 << 9)
/**
* \brief DrySPONGE128 domain value for processing the nonce.
*/
#define DRYDOMAIN128_NONCE (1 << 10)
/**
* \brief DrySPONGE128 domain value for processing the associated data.
*/
#define DRYDOMAIN128_ASSOC_DATA (2 << 10)
/**
* \brief DrySPONGE128 domain value for processing the message.
*/
#define DRYDOMAIN128_MESSAGE (3 << 10)
#endif
/**
* \brief DrySPONGE256 domain bit for a padded block.
*/
#define DRYDOMAIN256_PADDED (1 << 2)
/**
* \brief DrySPONGE256 domain bit for a final block.
*/
#define DRYDOMAIN256_FINAL (1 << 3)
/**
* \brief DrySPONGE256 domain value for processing the nonce.
*/
#define DRYDOMAIN256_NONCE (1 << 4)
/**
* \brief DrySPONGE256 domain value for processing the associated data.
*/
#define DRYDOMAIN256_ASSOC_DATA (2 << 4)
/**
* \brief DrySPONGE256 domain value for processing the message.
*/
#define DRYDOMAIN256_MESSAGE (3 << 4)
/**
* \brief Internal state of the GASCON-128 permutation.
*/
typedef union
{
uint64_t S[GASCON128_STATE_SIZE / 8]; /**< 64-bit words of the state */
uint32_t W[GASCON128_STATE_SIZE / 4]; /**< 32-bit words of the state */
uint8_t B[GASCON128_STATE_SIZE]; /**< Bytes of the state */
} gascon128_state_t;
/**
* \brief Internal state of the GASCON-256 permutation.
*/
typedef union
{
uint64_t S[GASCON256_STATE_SIZE / 8]; /**< 64-bit words of the state */
uint32_t W[GASCON256_STATE_SIZE / 4]; /**< 32-bit words of the state */
uint8_t B[GASCON256_STATE_SIZE]; /**< Bytes of the state */
} gascon256_state_t;
/**
* \brief Structure of a rate block for DrySPONGE128.
*/
typedef union
{
uint64_t S[DRYSPONGE128_RATE / 8]; /**< 64-bit words of the rate */
uint32_t W[DRYSPONGE128_RATE / 4]; /**< 32-bit words of the rate */
uint8_t B[DRYSPONGE128_RATE]; /**< Bytes of the rate */
} drysponge128_rate_t;
/**
* \brief Structure of a rate block for DrySPONGE256.
*/
typedef union
{
uint64_t S[DRYSPONGE256_RATE / 8]; /**< 64-bit words of the rate */
uint32_t W[DRYSPONGE256_RATE / 4]; /**< 32-bit words of the rate */
uint8_t B[DRYSPONGE256_RATE]; /**< Bytes of the rate */
} drysponge256_rate_t;
/**
* \brief Structure of the "x" value for DrySPONGE128.
*/
typedef union
{
uint64_t S[DRYSPONGE128_XSIZE / 8]; /**< 64-bit words of the rate */
uint32_t W[DRYSPONGE128_XSIZE / 4]; /**< 32-bit words of the rate */
uint8_t B[DRYSPONGE128_XSIZE]; /**< Bytes of the rate */
} __attribute__((aligned(16))) drysponge128_x_t;
/**
* \brief Structure of the "x" value for DrySPONGE256.
*/
typedef union
{
uint64_t S[DRYSPONGE256_XSIZE / 8]; /**< 64-bit words of the rate */
uint32_t W[DRYSPONGE256_XSIZE / 4]; /**< 32-bit words of the rate */
uint8_t B[DRYSPONGE256_XSIZE]; /**< Bytes of the rate */
} drysponge256_x_t;
/**
* \brief Structure of the rolling DrySPONGE128 state.
*/
typedef struct
{
gascon128_state_t c; /**< GASCON-128 state for the capacity */
uint32_t domain; /**< Domain value to mix on next F call */
uint32_t rounds; /**< Number of rounds for next G call */
drysponge128_rate_t r; /**< Buffer for a rate block of data */
drysponge128_x_t x; /**< "x" value for the sponge */
} __attribute__((aligned(16))) drysponge128_state_t;
/**
* \brief Structure of the rolling DrySPONGE256 state.
*/
typedef struct
{
gascon256_state_t c; /**< GASCON-256 state for the capacity */
drysponge256_rate_t r; /**< Buffer for a rate block of data */
drysponge256_x_t x; /**< "x" value for the sponge */
uint32_t domain; /**< Domain value to mix on next F call */
uint32_t rounds; /**< Number of rounds for next G call */
} drysponge256_state_t;
/**
* \brief Permutes the GASCON-128 state using one iteration of CoreRound.
*
* \param state The GASCON-128 state to be permuted.
* \param round The round number.
*
* The input and output \a state will be in little-endian byte order.
*/
void gascon128_core_round(gascon128_state_t *state, uint8_t round);
/**
* \brief Permutes the GASCON-256 state using one iteration of CoreRound.
*
* \param state The GASCON-256 state to be permuted.
* \param round The round number.
*
* The input and output \a state will be in little-endian byte order.
*/
void gascon256_core_round(gascon256_state_t *state, uint8_t round);
/**
* \brief Performs the DrySPONGE128 G function which runs the core
* rounds and squeezes data out of the GASGON-128 state.
*
* \param state The DrySPONGE128 state.
*
* The data that is squeezed out will be in state->r on exit.
*/
void drysponge128_g(drysponge128_state_t *state);
/**
* \brief Performs the DrySPONGE256 G function which runs the core
* rounds and squeezes data out of the GASGON-256 state.
*
* \param state The DrySPONGE256 state.
*
* The data that is squeezed out will be in state->r on exit.
*/
void drysponge256_g(drysponge256_state_t *state);
/**
* \brief Performs the DrySPONGE128 G function which runs the core
* rounds but does not squeeze out any output.
*
* \param state The DrySPONGE128 state.
*/
void drysponge128_g_core(drysponge128_state_t *state);
/**
* \brief Performs the DrySPONGE256 G function which runs the core
* rounds but does not squeeze out any output.
*
* \param state The DrySPONGE256 state.
*/
void drysponge256_g_core(drysponge256_state_t *state);
/**
* \brief Performs the absorption phase of the DrySPONGE256 F function.
*
* \param state The DrySPONGE256 state.
* \param input The block of input data to incorporate into the state.
* \param len The length of the input block, which must be less than
* or equal to DRYSPONGE256_RATE. Smaller input blocks will be padded.
*
* This function must be followed by a call to drysponge256_g() or
* drysponge256_g_core() to perform the full F operation.
*/
void drysponge256_f_absorb
(drysponge256_state_t *state, const unsigned char *input, unsigned len);
void drygascon128_f_wrap(drysponge128_state_t *state, const unsigned char *input, unsigned len);
/**
* \brief Determine if state alignement is safe vs timing attacks.
*
* \param state Points to the state to check.
*
* \return Non-zero if alignement is safe.
*
* We expect this to be completly optimized out by compiler if the alignement is enforced at build time
*/
int drysponge128_safe_alignement(const drysponge128_state_t*state);
/**
* \brief Set up a DrySPONGE128 state to begin encryption or decryption.
*
* \param state The DrySPONGE128 state.
* \param key Points to the 16 bytes of the key.
* \param nonce Points to the 16 bytes of the nonce.
* \param final_block Non-zero if after key setup there will be no more blocks.
*/
void drysponge128_setup
(drysponge128_state_t *state, const unsigned char *key, unsigned int keysize,
const unsigned char *nonce, int final_block);
/**
* \brief Set up a DrySPONGE256 state to begin encryption or decryption.
*
* \param state The DrySPONGE256 state.
* \param key Points to the 32 bytes of the key.
* \param nonce Points to the 16 bytes of the nonce.
* \param final_block Non-zero if after key setup there will be no more blocks.
*/
void drysponge256_setup
(drysponge256_state_t *state, const unsigned char *key,
const unsigned char *nonce, int final_block);
#ifdef __cplusplus
}
#endif
#endif
/*
* Copyright (C) 2020 Southern Storm Software, Pty Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef LW_INTERNAL_UTIL_H
#define LW_INTERNAL_UTIL_H
#include <stdint.h>
/* Figure out how to inline functions using this C compiler */
#if defined(__STDC__) && __STDC_VERSION__ >= 199901L
#define STATIC_INLINE static inline
#elif defined(__GNUC__) || defined(__clang__)
#define STATIC_INLINE static __inline__
#else
#define STATIC_INLINE static
#endif
/* Try to figure out whether the CPU is little-endian or big-endian.
* May need to modify this to include new compiler-specific defines.
* Alternatively, define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ in your
* compiler flags when you compile this library */
#if defined(__x86_64) || defined(__x86_64__) || \
defined(__i386) || defined(__i386__) || \
defined(__AVR__) || defined(__arm) || defined(__arm__) || \
defined(_M_AMD64) || defined(_M_X64) || defined(_M_IX86) || \
defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM_FP) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == 1234) || \
defined(__LITTLE_ENDIAN__)
#define LW_UTIL_LITTLE_ENDIAN 1
#elif (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == 4321) || \
defined(__BIG_ENDIAN__)
/* Big endian */
#else
#error "Cannot determine the endianess of this platform"
#endif
/* Helper macros to load and store values while converting endian-ness */
/* Load a big-endian 32-bit word from a byte buffer */
#define be_load_word32(ptr) \
((((uint32_t)((ptr)[0])) << 24) | \
(((uint32_t)((ptr)[1])) << 16) | \
(((uint32_t)((ptr)[2])) << 8) | \
((uint32_t)((ptr)[3])))
/* Store a big-endian 32-bit word into a byte buffer */
#define be_store_word32(ptr, x) \
do { \
uint32_t _x = (x); \
(ptr)[0] = (uint8_t)(_x >> 24); \
(ptr)[1] = (uint8_t)(_x >> 16); \
(ptr)[2] = (uint8_t)(_x >> 8); \
(ptr)[3] = (uint8_t)_x; \
} while (0)
/* Load a little-endian 32-bit word from a byte buffer */
#define le_load_word32(ptr) \
((((uint32_t)((ptr)[3])) << 24) | \
(((uint32_t)((ptr)[2])) << 16) | \
(((uint32_t)((ptr)[1])) << 8) | \
((uint32_t)((ptr)[0])))
/* Store a little-endian 32-bit word into a byte buffer */
#define le_store_word32(ptr, x) \
do { \
uint32_t _x = (x); \
(ptr)[0] = (uint8_t)_x; \
(ptr)[1] = (uint8_t)(_x >> 8); \
(ptr)[2] = (uint8_t)(_x >> 16); \
(ptr)[3] = (uint8_t)(_x >> 24); \
} while (0)
/* Load a big-endian 64-bit word from a byte buffer */
#define be_load_word64(ptr) \
((((uint64_t)((ptr)[0])) << 56) | \
(((uint64_t)((ptr)[1])) << 48) | \
(((uint64_t)((ptr)[2])) << 40) | \
(((uint64_t)((ptr)[3])) << 32) | \
(((uint64_t)((ptr)[4])) << 24) | \
(((uint64_t)((ptr)[5])) << 16) | \
(((uint64_t)((ptr)[6])) << 8) | \
((uint64_t)((ptr)[7])))
/* Store a big-endian 64-bit word into a byte buffer */
#define be_store_word64(ptr, x) \
do { \
uint64_t _x = (x); \
(ptr)[0] = (uint8_t)(_x >> 56); \
(ptr)[1] = (uint8_t)(_x >> 48); \
(ptr)[2] = (uint8_t)(_x >> 40); \
(ptr)[3] = (uint8_t)(_x >> 32); \
(ptr)[4] = (uint8_t)(_x >> 24); \
(ptr)[5] = (uint8_t)(_x >> 16); \
(ptr)[6] = (uint8_t)(_x >> 8); \
(ptr)[7] = (uint8_t)_x; \
} while (0)
/* Load a little-endian 64-bit word from a byte buffer */
#define le_load_word64(ptr) \
((((uint64_t)((ptr)[7])) << 56) | \
(((uint64_t)((ptr)[6])) << 48) | \
(((uint64_t)((ptr)[5])) << 40) | \
(((uint64_t)((ptr)[4])) << 32) | \
(((uint64_t)((ptr)[3])) << 24) | \
(((uint64_t)((ptr)[2])) << 16) | \
(((uint64_t)((ptr)[1])) << 8) | \
((uint64_t)((ptr)[0])))
/* Store a little-endian 64-bit word into a byte buffer */
#define le_store_word64(ptr, x) \
do { \
uint64_t _x = (x); \
(ptr)[0] = (uint8_t)_x; \
(ptr)[1] = (uint8_t)(_x >> 8); \
(ptr)[2] = (uint8_t)(_x >> 16); \
(ptr)[3] = (uint8_t)(_x >> 24); \
(ptr)[4] = (uint8_t)(_x >> 32); \
(ptr)[5] = (uint8_t)(_x >> 40); \
(ptr)[6] = (uint8_t)(_x >> 48); \
(ptr)[7] = (uint8_t)(_x >> 56); \
} while (0)
/* Load a big-endian 16-bit word from a byte buffer */
#define be_load_word16(ptr) \
((((uint16_t)((ptr)[0])) << 8) | \
((uint16_t)((ptr)[1])))
/* Store a big-endian 16-bit word into a byte buffer */
#define be_store_word16(ptr, x) \
do { \
uint16_t _x = (x); \
(ptr)[0] = (uint8_t)(_x >> 8); \
(ptr)[1] = (uint8_t)_x; \
} while (0)
/* Load a little-endian 16-bit word from a byte buffer */
#define le_load_word16(ptr) \
((((uint16_t)((ptr)[1])) << 8) | \
((uint16_t)((ptr)[0])))
/* Store a little-endian 16-bit word into a byte buffer */
#define le_store_word16(ptr, x) \
do { \
uint16_t _x = (x); \
(ptr)[0] = (uint8_t)_x; \
(ptr)[1] = (uint8_t)(_x >> 8); \
} while (0)
/* XOR a source byte buffer against a destination */
#define lw_xor_block(dest, src, len) \
do { \
unsigned char *_dest = (dest); \
const unsigned char *_src = (src); \
unsigned _len = (len); \
while (_len > 0) { \
*_dest++ ^= *_src++; \
--_len; \
} \
} while (0)
/* XOR two source byte buffers and put the result in a destination buffer */
#define lw_xor_block_2_src(dest, src1, src2, len) \
do { \
unsigned char *_dest = (dest); \
const unsigned char *_src1 = (src1); \
const unsigned char *_src2 = (src2); \
unsigned _len = (len); \
while (_len > 0) { \
*_dest++ = *_src1++ ^ *_src2++; \
--_len; \
} \
} while (0)
/* XOR a source byte buffer against a destination and write to another
* destination at the same time */
#define lw_xor_block_2_dest(dest2, dest, src, len) \
do { \
unsigned char *_dest2 = (dest2); \
unsigned char *_dest = (dest); \
const unsigned char *_src = (src); \
unsigned _len = (len); \
while (_len > 0) { \
*_dest2++ = (*_dest++ ^= *_src++); \
--_len; \
} \
} while (0)
/* XOR two byte buffers and write to a destination which at the same
* time copying the contents of src2 to dest2 */
#define lw_xor_block_copy_src(dest2, dest, src1, src2, len) \
do { \
unsigned char *_dest2 = (dest2); \
unsigned char *_dest = (dest); \
const unsigned char *_src1 = (src1); \
const unsigned char *_src2 = (src2); \
unsigned _len = (len); \
while (_len > 0) { \
unsigned char _temp = *_src2++; \
*_dest2++ = _temp; \
*_dest++ = *_src1++ ^ _temp; \
--_len; \
} \
} while (0)
/* XOR a source byte buffer against a destination and write to another
* destination at the same time. This version swaps the source value
* into the "dest" buffer */
#define lw_xor_block_swap(dest2, dest, src, len) \
do { \
unsigned char *_dest2 = (dest2); \
unsigned char *_dest = (dest); \
const unsigned char *_src = (src); \
unsigned _len = (len); \
while (_len > 0) { \
unsigned char _temp = *_src++; \
*_dest2++ = *_dest ^ _temp; \
*_dest++ = _temp; \
--_len; \
} \
} while (0)
/* Rotation functions need to be optimised for best performance on AVR.
* The most efficient rotations are where the number of bits is 1 or a
* multiple of 8, so we compose the efficient rotations to produce all
* other rotation counts of interest. */
#if defined(__AVR__)
#define LW_CRYPTO_ROTATE32_COMPOSED 1
#else
#define LW_CRYPTO_ROTATE32_COMPOSED 0
#endif
/* Rotation macros for 32-bit arguments */
/* Generic left rotate */
#define leftRotate(a, bits) \
(__extension__ ({ \
uint32_t _temp = (a); \
(_temp << (bits)) | (_temp >> (32 - (bits))); \
}))
/* Generic right rotate */
#define rightRotate(a, bits) \
(__extension__ ({ \
uint32_t _temp = (a); \
(_temp >> (bits)) | (_temp << (32 - (bits))); \
}))
#if !LW_CRYPTO_ROTATE32_COMPOSED
/* Left rotate by a specific number of bits. These macros may be replaced
* with more efficient ones on platforms that lack a barrel shifter */
#define leftRotate1(a) (leftRotate((a), 1))
#define leftRotate2(a) (leftRotate((a), 2))
#define leftRotate3(a) (leftRotate((a), 3))
#define leftRotate4(a) (leftRotate((a), 4))
#define leftRotate5(a) (leftRotate((a), 5))
#define leftRotate6(a) (leftRotate((a), 6))
#define leftRotate7(a) (leftRotate((a), 7))
#define leftRotate8(a) (leftRotate((a), 8))
#define leftRotate9(a) (leftRotate((a), 9))
#define leftRotate10(a) (leftRotate((a), 10))
#define leftRotate11(a) (leftRotate((a), 11))
#define leftRotate12(a) (leftRotate((a), 12))
#define leftRotate13(a) (leftRotate((a), 13))
#define leftRotate14(a) (leftRotate((a), 14))
#define leftRotate15(a) (leftRotate((a), 15))
#define leftRotate16(a) (leftRotate((a), 16))
#define leftRotate17(a) (leftRotate((a), 17))
#define leftRotate18(a) (leftRotate((a), 18))
#define leftRotate19(a) (leftRotate((a), 19))
#define leftRotate20(a) (leftRotate((a), 20))
#define leftRotate21(a) (leftRotate((a), 21))
#define leftRotate22(a) (leftRotate((a), 22))
#define leftRotate23(a) (leftRotate((a), 23))
#define leftRotate24(a) (leftRotate((a), 24))
#define leftRotate25(a) (leftRotate((a), 25))
#define leftRotate26(a) (leftRotate((a), 26))
#define leftRotate27(a) (leftRotate((a), 27))
#define leftRotate28(a) (leftRotate((a), 28))
#define leftRotate29(a) (leftRotate((a), 29))
#define leftRotate30(a) (leftRotate((a), 30))
#define leftRotate31(a) (leftRotate((a), 31))
/* Right rotate by a specific number of bits. These macros may be replaced
* with more efficient ones on platforms that lack a barrel shifter */
#define rightRotate1(a) (rightRotate((a), 1))
#define rightRotate2(a) (rightRotate((a), 2))
#define rightRotate3(a) (rightRotate((a), 3))
#define rightRotate4(a) (rightRotate((a), 4))
#define rightRotate5(a) (rightRotate((a), 5))
#define rightRotate6(a) (rightRotate((a), 6))
#define rightRotate7(a) (rightRotate((a), 7))
#define rightRotate8(a) (rightRotate((a), 8))
#define rightRotate9(a) (rightRotate((a), 9))
#define rightRotate10(a) (rightRotate((a), 10))
#define rightRotate11(a) (rightRotate((a), 11))
#define rightRotate12(a) (rightRotate((a), 12))
#define rightRotate13(a) (rightRotate((a), 13))
#define rightRotate14(a) (rightRotate((a), 14))
#define rightRotate15(a) (rightRotate((a), 15))
#define rightRotate16(a) (rightRotate((a), 16))
#define rightRotate17(a) (rightRotate((a), 17))
#define rightRotate18(a) (rightRotate((a), 18))
#define rightRotate19(a) (rightRotate((a), 19))
#define rightRotate20(a) (rightRotate((a), 20))
#define rightRotate21(a) (rightRotate((a), 21))
#define rightRotate22(a) (rightRotate((a), 22))
#define rightRotate23(a) (rightRotate((a), 23))
#define rightRotate24(a) (rightRotate((a), 24))
#define rightRotate25(a) (rightRotate((a), 25))
#define rightRotate26(a) (rightRotate((a), 26))
#define rightRotate27(a) (rightRotate((a), 27))
#define rightRotate28(a) (rightRotate((a), 28))
#define rightRotate29(a) (rightRotate((a), 29))
#define rightRotate30(a) (rightRotate((a), 30))
#define rightRotate31(a) (rightRotate((a), 31))
#else /* LW_CRYPTO_ROTATE32_COMPOSED */
/* Composed rotation macros where 1 and 8 are fast, but others are slow */
/* Left rotate by 1 */
#define leftRotate1(a) (leftRotate((a), 1))
/* Left rotate by 2 */
#define leftRotate2(a) (leftRotate(leftRotate((a), 1), 1))
/* Left rotate by 3 */
#define leftRotate3(a) (leftRotate(leftRotate(leftRotate((a), 1), 1), 1))
/* Left rotate by 4 */
#define leftRotate4(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 1), 1), 1), 1))
/* Left rotate by 5: Rotate left by 8, then right by 3 */
#define leftRotate5(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 8), 1), 1), 1))
/* Left rotate by 6: Rotate left by 8, then right by 2 */
#define leftRotate6(a) (rightRotate(rightRotate(leftRotate((a), 8), 1), 1))
/* Left rotate by 7: Rotate left by 8, then right by 1 */
#define leftRotate7(a) (rightRotate(leftRotate((a), 8), 1))
/* Left rotate by 8 */
#define leftRotate8(a) (leftRotate((a), 8))
/* Left rotate by 9: Rotate left by 8, then left by 1 */
#define leftRotate9(a) (leftRotate(leftRotate((a), 8), 1))
/* Left rotate by 10: Rotate left by 8, then left by 2 */
#define leftRotate10(a) (leftRotate(leftRotate(leftRotate((a), 8), 1), 1))
/* Left rotate by 11: Rotate left by 8, then left by 3 */
#define leftRotate11(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 8), 1), 1), 1))
/* Left rotate by 12: Rotate left by 16, then right by 4 */
#define leftRotate12(a) (rightRotate(rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1), 1))
/* Left rotate by 13: Rotate left by 16, then right by 3 */
#define leftRotate13(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1))
/* Left rotate by 14: Rotate left by 16, then right by 2 */
#define leftRotate14(a) (rightRotate(rightRotate(leftRotate((a), 16), 1), 1))
/* Left rotate by 15: Rotate left by 16, then right by 1 */
#define leftRotate15(a) (rightRotate(leftRotate((a), 16), 1))
/* Left rotate by 16 */
#define leftRotate16(a) (leftRotate((a), 16))
/* Left rotate by 17: Rotate left by 16, then left by 1 */
#define leftRotate17(a) (leftRotate(leftRotate((a), 16), 1))
/* Left rotate by 18: Rotate left by 16, then left by 2 */
#define leftRotate18(a) (leftRotate(leftRotate(leftRotate((a), 16), 1), 1))
/* Left rotate by 19: Rotate left by 16, then left by 3 */
#define leftRotate19(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1))
/* Left rotate by 20: Rotate left by 16, then left by 4 */
#define leftRotate20(a) (leftRotate(leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1), 1))
/* Left rotate by 21: Rotate left by 24, then right by 3 */
#define leftRotate21(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 24), 1), 1), 1))
/* Left rotate by 22: Rotate left by 24, then right by 2 */
#define leftRotate22(a) (rightRotate(rightRotate(leftRotate((a), 24), 1), 1))
/* Left rotate by 23: Rotate left by 24, then right by 1 */
#define leftRotate23(a) (rightRotate(leftRotate((a), 24), 1))
/* Left rotate by 24 */
#define leftRotate24(a) (leftRotate((a), 24))
/* Left rotate by 25: Rotate left by 24, then left by 1 */
#define leftRotate25(a) (leftRotate(leftRotate((a), 24), 1))
/* Left rotate by 26: Rotate left by 24, then left by 2 */
#define leftRotate26(a) (leftRotate(leftRotate(leftRotate((a), 24), 1), 1))
/* Left rotate by 27: Rotate left by 24, then left by 3 */
#define leftRotate27(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 24), 1), 1), 1))
/* Left rotate by 28: Rotate right by 4 */
#define leftRotate28(a) (rightRotate(rightRotate(rightRotate(rightRotate((a), 1), 1), 1), 1))
/* Left rotate by 29: Rotate right by 3 */
#define leftRotate29(a) (rightRotate(rightRotate(rightRotate((a), 1), 1), 1))
/* Left rotate by 30: Rotate right by 2 */
#define leftRotate30(a) (rightRotate(rightRotate((a), 1), 1))
/* Left rotate by 31: Rotate right by 1 */
#define leftRotate31(a) (rightRotate((a), 1))
/* Define the 32-bit right rotations in terms of left rotations */
#define rightRotate1(a) (leftRotate31((a)))
#define rightRotate2(a) (leftRotate30((a)))
#define rightRotate3(a) (leftRotate29((a)))
#define rightRotate4(a) (leftRotate28((a)))
#define rightRotate5(a) (leftRotate27((a)))
#define rightRotate6(a) (leftRotate26((a)))
#define rightRotate7(a) (leftRotate25((a)))
#define rightRotate8(a) (leftRotate24((a)))
#define rightRotate9(a) (leftRotate23((a)))
#define rightRotate10(a) (leftRotate22((a)))
#define rightRotate11(a) (leftRotate21((a)))
#define rightRotate12(a) (leftRotate20((a)))
#define rightRotate13(a) (leftRotate19((a)))
#define rightRotate14(a) (leftRotate18((a)))
#define rightRotate15(a) (leftRotate17((a)))
#define rightRotate16(a) (leftRotate16((a)))
#define rightRotate17(a) (leftRotate15((a)))
#define rightRotate18(a) (leftRotate14((a)))
#define rightRotate19(a) (leftRotate13((a)))
#define rightRotate20(a) (leftRotate12((a)))
#define rightRotate21(a) (leftRotate11((a)))
#define rightRotate22(a) (leftRotate10((a)))
#define rightRotate23(a) (leftRotate9((a)))
#define rightRotate24(a) (leftRotate8((a)))
#define rightRotate25(a) (leftRotate7((a)))
#define rightRotate26(a) (leftRotate6((a)))
#define rightRotate27(a) (leftRotate5((a)))
#define rightRotate28(a) (leftRotate4((a)))
#define rightRotate29(a) (leftRotate3((a)))
#define rightRotate30(a) (leftRotate2((a)))
#define rightRotate31(a) (leftRotate1((a)))
#endif /* LW_CRYPTO_ROTATE32_COMPOSED */
/* Rotation macros for 64-bit arguments */
/* Generic left rotate */
#define leftRotate_64(a, bits) \
(__extension__ ({ \
uint64_t _temp = (a); \
(_temp << (bits)) | (_temp >> (64 - (bits))); \
}))
/* Generic right rotate */
#define rightRotate_64(a, bits) \
(__extension__ ({ \
uint64_t _temp = (a); \
(_temp >> (bits)) | (_temp << (64 - (bits))); \
}))
/* Left rotate by a specific number of bits. These macros may be replaced
* with more efficient ones on platforms that lack a barrel shifter */
#define leftRotate1_64(a) (leftRotate_64((a), 1))
#define leftRotate2_64(a) (leftRotate_64((a), 2))
#define leftRotate3_64(a) (leftRotate_64((a), 3))
#define leftRotate4_64(a) (leftRotate_64((a), 4))
#define leftRotate5_64(a) (leftRotate_64((a), 5))
#define leftRotate6_64(a) (leftRotate_64((a), 6))
#define leftRotate7_64(a) (leftRotate_64((a), 7))
#define leftRotate8_64(a) (leftRotate_64((a), 8))
#define leftRotate9_64(a) (leftRotate_64((a), 9))
#define leftRotate10_64(a) (leftRotate_64((a), 10))
#define leftRotate11_64(a) (leftRotate_64((a), 11))
#define leftRotate12_64(a) (leftRotate_64((a), 12))
#define leftRotate13_64(a) (leftRotate_64((a), 13))
#define leftRotate14_64(a) (leftRotate_64((a), 14))
#define leftRotate15_64(a) (leftRotate_64((a), 15))
#define leftRotate16_64(a) (leftRotate_64((a), 16))
#define leftRotate17_64(a) (leftRotate_64((a), 17))
#define leftRotate18_64(a) (leftRotate_64((a), 18))
#define leftRotate19_64(a) (leftRotate_64((a), 19))
#define leftRotate20_64(a) (leftRotate_64((a), 20))
#define leftRotate21_64(a) (leftRotate_64((a), 21))
#define leftRotate22_64(a) (leftRotate_64((a), 22))
#define leftRotate23_64(a) (leftRotate_64((a), 23))
#define leftRotate24_64(a) (leftRotate_64((a), 24))
#define leftRotate25_64(a) (leftRotate_64((a), 25))
#define leftRotate26_64(a) (leftRotate_64((a), 26))
#define leftRotate27_64(a) (leftRotate_64((a), 27))
#define leftRotate28_64(a) (leftRotate_64((a), 28))
#define leftRotate29_64(a) (leftRotate_64((a), 29))
#define leftRotate30_64(a) (leftRotate_64((a), 30))
#define leftRotate31_64(a) (leftRotate_64((a), 31))
#define leftRotate32_64(a) (leftRotate_64((a), 32))
#define leftRotate33_64(a) (leftRotate_64((a), 33))
#define leftRotate34_64(a) (leftRotate_64((a), 34))
#define leftRotate35_64(a) (leftRotate_64((a), 35))
#define leftRotate36_64(a) (leftRotate_64((a), 36))
#define leftRotate37_64(a) (leftRotate_64((a), 37))
#define leftRotate38_64(a) (leftRotate_64((a), 38))
#define leftRotate39_64(a) (leftRotate_64((a), 39))
#define leftRotate40_64(a) (leftRotate_64((a), 40))
#define leftRotate41_64(a) (leftRotate_64((a), 41))
#define leftRotate42_64(a) (leftRotate_64((a), 42))
#define leftRotate43_64(a) (leftRotate_64((a), 43))
#define leftRotate44_64(a) (leftRotate_64((a), 44))
#define leftRotate45_64(a) (leftRotate_64((a), 45))
#define leftRotate46_64(a) (leftRotate_64((a), 46))
#define leftRotate47_64(a) (leftRotate_64((a), 47))
#define leftRotate48_64(a) (leftRotate_64((a), 48))
#define leftRotate49_64(a) (leftRotate_64((a), 49))
#define leftRotate50_64(a) (leftRotate_64((a), 50))
#define leftRotate51_64(a) (leftRotate_64((a), 51))
#define leftRotate52_64(a) (leftRotate_64((a), 52))
#define leftRotate53_64(a) (leftRotate_64((a), 53))
#define leftRotate54_64(a) (leftRotate_64((a), 54))
#define leftRotate55_64(a) (leftRotate_64((a), 55))
#define leftRotate56_64(a) (leftRotate_64((a), 56))
#define leftRotate57_64(a) (leftRotate_64((a), 57))
#define leftRotate58_64(a) (leftRotate_64((a), 58))
#define leftRotate59_64(a) (leftRotate_64((a), 59))
#define leftRotate60_64(a) (leftRotate_64((a), 60))
#define leftRotate61_64(a) (leftRotate_64((a), 61))
#define leftRotate62_64(a) (leftRotate_64((a), 62))
#define leftRotate63_64(a) (leftRotate_64((a), 63))
/* Right rotate by a specific number of bits. These macros may be replaced
* with more efficient ones on platforms that lack a barrel shifter */
#define rightRotate1_64(a) (rightRotate_64((a), 1))
#define rightRotate2_64(a) (rightRotate_64((a), 2))
#define rightRotate3_64(a) (rightRotate_64((a), 3))
#define rightRotate4_64(a) (rightRotate_64((a), 4))
#define rightRotate5_64(a) (rightRotate_64((a), 5))
#define rightRotate6_64(a) (rightRotate_64((a), 6))
#define rightRotate7_64(a) (rightRotate_64((a), 7))
#define rightRotate8_64(a) (rightRotate_64((a), 8))
#define rightRotate9_64(a) (rightRotate_64((a), 9))
#define rightRotate10_64(a) (rightRotate_64((a), 10))
#define rightRotate11_64(a) (rightRotate_64((a), 11))
#define rightRotate12_64(a) (rightRotate_64((a), 12))
#define rightRotate13_64(a) (rightRotate_64((a), 13))
#define rightRotate14_64(a) (rightRotate_64((a), 14))
#define rightRotate15_64(a) (rightRotate_64((a), 15))
#define rightRotate16_64(a) (rightRotate_64((a), 16))
#define rightRotate17_64(a) (rightRotate_64((a), 17))
#define rightRotate18_64(a) (rightRotate_64((a), 18))
#define rightRotate19_64(a) (rightRotate_64((a), 19))
#define rightRotate20_64(a) (rightRotate_64((a), 20))
#define rightRotate21_64(a) (rightRotate_64((a), 21))
#define rightRotate22_64(a) (rightRotate_64((a), 22))
#define rightRotate23_64(a) (rightRotate_64((a), 23))
#define rightRotate24_64(a) (rightRotate_64((a), 24))
#define rightRotate25_64(a) (rightRotate_64((a), 25))
#define rightRotate26_64(a) (rightRotate_64((a), 26))
#define rightRotate27_64(a) (rightRotate_64((a), 27))
#define rightRotate28_64(a) (rightRotate_64((a), 28))
#define rightRotate29_64(a) (rightRotate_64((a), 29))
#define rightRotate30_64(a) (rightRotate_64((a), 30))
#define rightRotate31_64(a) (rightRotate_64((a), 31))
#define rightRotate32_64(a) (rightRotate_64((a), 32))
#define rightRotate33_64(a) (rightRotate_64((a), 33))
#define rightRotate34_64(a) (rightRotate_64((a), 34))
#define rightRotate35_64(a) (rightRotate_64((a), 35))
#define rightRotate36_64(a) (rightRotate_64((a), 36))
#define rightRotate37_64(a) (rightRotate_64((a), 37))
#define rightRotate38_64(a) (rightRotate_64((a), 38))
#define rightRotate39_64(a) (rightRotate_64((a), 39))
#define rightRotate40_64(a) (rightRotate_64((a), 40))
#define rightRotate41_64(a) (rightRotate_64((a), 41))
#define rightRotate42_64(a) (rightRotate_64((a), 42))
#define rightRotate43_64(a) (rightRotate_64((a), 43))
#define rightRotate44_64(a) (rightRotate_64((a), 44))
#define rightRotate45_64(a) (rightRotate_64((a), 45))
#define rightRotate46_64(a) (rightRotate_64((a), 46))
#define rightRotate47_64(a) (rightRotate_64((a), 47))
#define rightRotate48_64(a) (rightRotate_64((a), 48))
#define rightRotate49_64(a) (rightRotate_64((a), 49))
#define rightRotate50_64(a) (rightRotate_64((a), 50))
#define rightRotate51_64(a) (rightRotate_64((a), 51))
#define rightRotate52_64(a) (rightRotate_64((a), 52))
#define rightRotate53_64(a) (rightRotate_64((a), 53))
#define rightRotate54_64(a) (rightRotate_64((a), 54))
#define rightRotate55_64(a) (rightRotate_64((a), 55))
#define rightRotate56_64(a) (rightRotate_64((a), 56))
#define rightRotate57_64(a) (rightRotate_64((a), 57))
#define rightRotate58_64(a) (rightRotate_64((a), 58))
#define rightRotate59_64(a) (rightRotate_64((a), 59))
#define rightRotate60_64(a) (rightRotate_64((a), 60))
#define rightRotate61_64(a) (rightRotate_64((a), 61))
#define rightRotate62_64(a) (rightRotate_64((a), 62))
#define rightRotate63_64(a) (rightRotate_64((a), 63))
/* Rotate a 16-bit value left by a number of bits */
#define leftRotate_16(a, bits) \
(__extension__ ({ \
uint16_t _temp = (a); \
(_temp << (bits)) | (_temp >> (16 - (bits))); \
}))
/* Rotate a 16-bit value right by a number of bits */
#define rightRotate_16(a, bits) \
(__extension__ ({ \
uint16_t _temp = (a); \
(_temp >> (bits)) | (_temp << (16 - (bits))); \
}))
/* Left rotate by a specific number of bits. These macros may be replaced
* with more efficient ones on platforms that lack a barrel shifter */
#define leftRotate1_16(a) (leftRotate_16((a), 1))
#define leftRotate2_16(a) (leftRotate_16((a), 2))
#define leftRotate3_16(a) (leftRotate_16((a), 3))
#define leftRotate4_16(a) (leftRotate_16((a), 4))
#define leftRotate5_16(a) (leftRotate_16((a), 5))
#define leftRotate6_16(a) (leftRotate_16((a), 6))
#define leftRotate7_16(a) (leftRotate_16((a), 7))
#define leftRotate8_16(a) (leftRotate_16((a), 8))
#define leftRotate9_16(a) (leftRotate_16((a), 9))
#define leftRotate10_16(a) (leftRotate_16((a), 10))
#define leftRotate11_16(a) (leftRotate_16((a), 11))
#define leftRotate12_16(a) (leftRotate_16((a), 12))
#define leftRotate13_16(a) (leftRotate_16((a), 13))
#define leftRotate14_16(a) (leftRotate_16((a), 14))
#define leftRotate15_16(a) (leftRotate_16((a), 15))
/* Right rotate by a specific number of bits. These macros may be replaced
* with more efficient ones on platforms that lack a barrel shifter */
#define rightRotate1_16(a) (rightRotate_16((a), 1))
#define rightRotate2_16(a) (rightRotate_16((a), 2))
#define rightRotate3_16(a) (rightRotate_16((a), 3))
#define rightRotate4_16(a) (rightRotate_16((a), 4))
#define rightRotate5_16(a) (rightRotate_16((a), 5))
#define rightRotate6_16(a) (rightRotate_16((a), 6))
#define rightRotate7_16(a) (rightRotate_16((a), 7))
#define rightRotate8_16(a) (rightRotate_16((a), 8))
#define rightRotate9_16(a) (rightRotate_16((a), 9))
#define rightRotate10_16(a) (rightRotate_16((a), 10))
#define rightRotate11_16(a) (rightRotate_16((a), 11))
#define rightRotate12_16(a) (rightRotate_16((a), 12))
#define rightRotate13_16(a) (rightRotate_16((a), 13))
#define rightRotate14_16(a) (rightRotate_16((a), 14))
#define rightRotate15_16(a) (rightRotate_16((a), 15))
/* Rotate an 8-bit value left by a number of bits */
#define leftRotate_8(a, bits) \
(__extension__ ({ \
uint8_t _temp = (a); \
(_temp << (bits)) | (_temp >> (8 - (bits))); \
}))
/* Rotate an 8-bit value right by a number of bits */
#define rightRotate_8(a, bits) \
(__extension__ ({ \
uint8_t _temp = (a); \
(_temp >> (bits)) | (_temp << (8 - (bits))); \
}))
/* Left rotate by a specific number of bits. These macros may be replaced
* with more efficient ones on platforms that lack a barrel shifter */
#define leftRotate1_8(a) (leftRotate_8((a), 1))
#define leftRotate2_8(a) (leftRotate_8((a), 2))
#define leftRotate3_8(a) (leftRotate_8((a), 3))
#define leftRotate4_8(a) (leftRotate_8((a), 4))
#define leftRotate5_8(a) (leftRotate_8((a), 5))
#define leftRotate6_8(a) (leftRotate_8((a), 6))
#define leftRotate7_8(a) (leftRotate_8((a), 7))
/* Right rotate by a specific number of bits. These macros may be replaced
* with more efficient ones on platforms that lack a barrel shifter */
#define rightRotate1_8(a) (rightRotate_8((a), 1))
#define rightRotate2_8(a) (rightRotate_8((a), 2))
#define rightRotate3_8(a) (rightRotate_8((a), 3))
#define rightRotate4_8(a) (rightRotate_8((a), 4))
#define rightRotate5_8(a) (rightRotate_8((a), 5))
#define rightRotate6_8(a) (rightRotate_8((a), 6))
#define rightRotate7_8(a) (rightRotate_8((a), 7))
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment