///////////////////////////////////////////////////////////////////////////////
// encrypt.c: Optimized C99 implementation of the AEAD algorithm SCHWAEMM. //
// This file is part of the SPARKLE submission to NIST's LW Crypto Project. //
// Version 1.1.2 (2020-10-30), see for updates. //
// Authors: The SPARKLE Group (C. Beierle, A. Biryukov, L. Cardoso dos //
// Santos, J. Groszschaedl, L. Perrin, A. Udovenko, V. Velichkov, Q. Wang). //
// License: GPLv3 (see LICENSE file), other licenses available upon request. //
// Copyright (C) 2019-2020 University of Luxembourg . //
// ------------------------------------------------------------------------- //
// This program is free software: you can redistribute it and/or modify it //
// under the terms of the GNU General Public License as published by the //
// Free Software Foundation, either version 3 of the License, or (at your //
// option) any later version. This program is distributed in the hope that //
// it will be useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details. You should have received a //
// copy of the GNU General Public License along with this program. If not, //
// see . //
///////////////////////////////////////////////////////////////////////////////
// This source code file should be compiled with the following set of flags:
// -std=c99 -Wall -Wextra -Wshadow -fsanitize=address,undefined -O2
// gencat_aead.c shall be used to generate the test vector output file. The
// test vector output file shall be provided in the corresponding
// crypto_aead/[algorithm]/ directory
#include // for size_t
#include // for memcpy, memset
#include "schwaemm_cfg.h"
#include "sparkle_opt.h"
typedef unsigned char UChar;
typedef unsigned long long int ULLInt;
#define KEY_WORDS (SCHWAEMM_KEY_LEN/32)
#define KEY_BYTES (SCHWAEMM_KEY_LEN/8)
#define NONCE_WORDS (SCHWAEMM_NONCE_LEN/32)
#define NONCE_BYTES (SCHWAEMM_NONCE_LEN/8)
#define TAG_WORDS (SCHWAEMM_TAG_LEN/32)
#define TAG_BYTES (SCHWAEMM_TAG_LEN/8)
#define STATE_BRANS (SPARKLE_STATE/64)
#define STATE_WORDS (SPARKLE_STATE/32)
#define STATE_BYTES (SPARKLE_STATE/8)
#define RATE_BRANS (SPARKLE_RATE/64)
#define RATE_WORDS (SPARKLE_RATE/32)
#define RATE_BYTES (SPARKLE_RATE/8)
#define CAP_BRANS (SPARKLE_CAPACITY/64)
#define CAP_WORDS (SPARKLE_CAPACITY/32)
#define CAP_BYTES (SPARKLE_CAPACITY/8)
#define CONST_A0 (((uint32_t) (0 ^ (1 << CAP_BRANS))) << 24)
#define CONST_A1 (((uint32_t) (1 ^ (1 << CAP_BRANS))) << 24)
#define CONST_M2 (((uint32_t) (2 ^ (1 << CAP_BRANS))) << 24)
#define CONST_M3 (((uint32_t) (3 ^ (1 << CAP_BRANS))) << 24)
///////////////////////////////////////////////////////////////////////////////
//// PREPROCESSOR DIRECTIVES TO REPLACE THE C CODE OF SPARKLE BY ASM CODE /////
///////////////////////////////////////////////////////////////////////////////
// When this file is compiled for an AVR microcontroller and SPARKLE_ASSEMBLER
// is defined in schwaemmconfig.h, then the AVR assembler implementation of the
// SPARKLE permutation is used. On the other hand, if SPARKLE_ASSEMBLER is not
// defined, then the C version (i.e. the function sparkle_opt) is used.
#if (defined(__AVR) || defined(__AVR__)) && defined(SPARKLE_ASSEMBLER)
extern void sparkle_avr(uint32_t *state, int brans, int steps);
#define sparkle_opt(state, brans, steps) sparkle_avr((state), (brans), (steps))
#endif // if defined(__AVR__) && ...
// When this file is compiled for a MSP430 (or a MSP430X) microcontroller and
// SPARKLE_ASSEMBLER is defined in schwaemmconfig.h, then the MSP430 assembler
// implementation of the SPARKLE permutation is used. On the other hand, if
// SPARKLE_ASSEMBLER is not defined, then the C version (i.e. the function
// sparkle_opt) is used.
#if (defined(MSP430) || defined(__MSP430__)) && defined(SPARKLE_ASSEMBLER)
extern void sparkle_msp(uint32_t *state, int brans, int steps);
#define sparkle_opt(state, brans, steps) sparkle_msp((state), (brans), (steps))
#endif // if (defined(MSP430) || ...
// When this file is compiled for an ARM microcontroller and SPARKLE_ASSEMBLER
// is defined in schwaemmconfig.h, then one of the three branch-unrolled ARMv7M
// assembler implementations of the SPARKLE permutation is used, depending on
// the concrete SCHWAEMM instance. On the other hand, if SPARKLE_ASSEMBLER is
// not defined, then the C version (i.e. the function sparkle_opt) is used.
#if (defined(__arm__) || defined(_M_ARM)) && defined(SPARKLE_ASSEMBLER)
#if defined(SCHWAEMM256_128) || defined(SCHWAEMM192_192)
extern void sparkle384_arm(uint32_t *state, int steps);
#define sparkle_opt(state, brans, steps) sparkle384_arm((state), (steps))
#elif defined(SCHWAEMM128_128)
extern void sparkle256_arm(uint32_t *state, int steps);
#define sparkle_opt(state, brans, steps) sparkle256_arm((state), (steps))
#elif defined(SCHWAEMM256_256)
extern void sparkle512_arm(uint32_t *state, int steps);
#define sparkle_opt(state, brans, steps) sparkle512_arm((state), (steps))
#endif // if defined(SCHWAEMM256_128) || ...
#endif // if defined(__arm__) && ...
///////////////////////////////////////////////////////////////////////////////
/////// HELPER FUNCTIONS AND MACROS (RHO1, RHO2, RATE-WHITENING, ETC.) ////////
///////////////////////////////////////////////////////////////////////////////
// The plaintext, associated data, and ciphertext are stored in arrays of type
// unsigned char. Casting such an unsigned-char-pointer to an uint32_t-pointer
// increases alignment requirements, i.e. the start address of the array has to
// be even on 16-bit architectures or a multiple of four (i.e. 4-byte aligned)
// on 32-bit and 64-bit platforms. The following preprocessor statements help
// to determine the alignment requirements for a uint32_t pointer.
#define MIN_SIZE(a, b) ((sizeof(a) < sizeof(b)) ? sizeof(a) : sizeof(b))
#if defined(_MSC_VER) && !defined(__clang__) && !defined(__ICL)
#define UI32_ALIGN_BYTES MIN_SIZE(unsigned __int32, size_t)
#else
#include
#define UI32_ALIGN_BYTES MIN_SIZE(uint32_t, uint_fast8_t)
#endif
// The rate-whitening for SCHWAEMM256_128 applies the "tweak" described in
// Section 2.3.2 of the specification. Therefore, the indices used to load the
// 32-bit words from the capacity-part of the state need to be reduced modulo
// CAP_WORDS, which the C implementation below does by ANDing the index with
// (CAP_WORDS - 1) = 3. Performing the modulo reduction in this way only works
// when CAP_WORDS is a power of 2, which is the case for SCHWAEMM256_128.
#if (RATE_WORDS > CAP_WORDS)
#define CAP_INDEX(i) ((i) & (CAP_WORDS-1))
#else // RATE_WORDS <= CAP_WORDS
#define CAP_INDEX(i) (i)
#endif
// Rho and rate-whitening for the authentication of associated data. The third
// parameter indicates whether the uint8_t-pointer 'in' is properly aligned to
// permit casting to a uint32_t-pointer. If this is the case then array 'in' is
// processed directly, otherwise it is first copied to an aligned buffer.
static void rho_whi_aut(uint32_t *state, const uint8_t *in, int aligned)
{
uint32_t buffer[RATE_WORDS];
uint32_t *in32;
uint32_t tmp;
int i, j;
if (aligned) { // 'in' can be casted to uint32_t pointer
in32 = (uint32_t *) in;
} else { // 'in' is not sufficiently aligned for casting
memcpy(buffer, in, RATE_BYTES);
in32 = (uint32_t *) &buffer;
}
for (i = 0, j = RATE_WORDS/2; i < RATE_WORDS/2; i++, j++) {
tmp = state[i];
state[i] = state[j] ^ in32[i] ^ state[RATE_WORDS+i];
state[j] ^= tmp ^ in32[j] ^ state[RATE_WORDS+CAP_INDEX(j)];
}
}
// Rho and rate-whitening for the authentication of the last associated-data
// block. Since this last block may require padding, it is always copied to a
// buffer.
static void rho_whi_aut_last(uint32_t *state, const uint8_t *in, size_t inlen)
{
uint32_t buffer[RATE_WORDS];
uint8_t *bufptr;
uint32_t tmp;
int i, j;
memcpy(buffer, in, inlen);
if (inlen < RATE_BYTES) { // padding
bufptr = ((uint8_t *) buffer) + inlen;
memset(bufptr, 0, (RATE_BYTES - inlen));
*bufptr = 0x80;
}
for (i = 0, j = RATE_WORDS/2; i < RATE_WORDS/2; i++, j++) {
tmp = state[i];
state[i] = state[j] ^ buffer[i] ^ state[RATE_WORDS+i];
state[j] ^= tmp ^ buffer[j] ^ state[RATE_WORDS+CAP_INDEX(j)];
}
}
// Rho and rate-whitening for the encryption of plaintext. The third parameter
// indicates whether the uint8_t-pointers 'in' and 'out' are properly aligned
// to permit casting to uint32_t-pointers. If this is the case then array 'in'
// and 'out' are processed directly, otherwise 'in' is copied to an aligned
// buffer.
static void rho_whi_enc(uint32_t *state, uint8_t *out, const uint8_t *in, \
int aligned)
{
uint32_t buffer[RATE_WORDS];
uint32_t *in32, *out32;
uint32_t tmp1, tmp2;
int i, j;
if (aligned) { // 'in' and 'out' can be casted to uint32_t pointer
in32 = (uint32_t *) in;
out32 = (uint32_t *) out;
} else { // 'in' or 'out' is not sufficiently aligned for casting
memcpy(buffer, in, RATE_BYTES);
in32 = out32 = (uint32_t *) buffer;
}
for (i = 0, j = RATE_WORDS/2; i < RATE_WORDS/2; i++, j++) {
tmp1 = state[i];
tmp2 = state[j];
state[i] = state[j] ^ in32[i] ^ state[RATE_WORDS+i];
state[j] ^= tmp1 ^ in32[j] ^ state[RATE_WORDS+CAP_INDEX(j)];
out32[i] = in32[i] ^ tmp1;
out32[j] = in32[j] ^ tmp2;
}
if (!aligned)
memcpy(out, buffer, RATE_BYTES);
}
// Rho and rate-whitening for the encryption of the last plaintext block. Since
// this last block may require padding, it is always copied to a buffer.
static void rho_whi_enc_last(uint32_t *state, uint8_t *out, const uint8_t *in, \
size_t inlen)
{
uint32_t buffer[RATE_WORDS];
uint32_t tmp1, tmp2;
uint8_t *bufptr;
int i, j;
memcpy(buffer, in, inlen);
if (inlen < RATE_BYTES) { // padding
bufptr = ((uint8_t *) buffer) + inlen;
memset(bufptr, 0, (RATE_BYTES - inlen));
*bufptr = 0x80;
}
for (i = 0, j = RATE_WORDS/2; i < RATE_WORDS/2; i++, j++) {
tmp1 = state[i];
tmp2 = state[j];
state[i] = state[j] ^ buffer[i] ^ state[RATE_WORDS+i];
state[j] ^= tmp1 ^ buffer[j] ^ state[RATE_WORDS+CAP_INDEX(j)];
buffer[i] ^= tmp1;
buffer[j] ^= tmp2;
}
memcpy(out, buffer, inlen);
}
// Rho and rate-whitening for the decryption of ciphertext. The third parameter
// indicates whether the uint8_t-pointers 'in' and 'out' are properly aligned
// to permit casting to uint32_t-pointers. If this is the case then array 'in'
// and 'out' are processed directly, otherwise 'in' is copied to an aligned
// buffer.
static void rho_whi_dec(uint32_t *state, uint8_t *out, const uint8_t *in, \
int aligned)
{
uint32_t buffer[RATE_WORDS];
uint32_t *in32, *out32;
uint32_t tmp1, tmp2;
int i, j;
if (aligned) { // 'in' and 'out' can be casted to uint32_t pointer
in32 = (uint32_t *) in;
out32 = (uint32_t *) out;
} else { // 'in' or 'out' is not sufficiently aligned for casting
memcpy(buffer, in, RATE_BYTES);
in32 = out32 = (uint32_t *) buffer;
}
for (i = 0, j = RATE_WORDS/2; i < RATE_WORDS/2; i++, j++) {
tmp1 = state[i];
tmp2 = state[j];
state[i] ^= state[j] ^ in32[i] ^ state[RATE_WORDS+i];
state[j] = tmp1 ^ in32[j] ^ state[RATE_WORDS+CAP_INDEX(j)];
out32[i] = in32[i] ^ tmp1;
out32[j] = in32[j] ^ tmp2;
}
if (!aligned)
memcpy(out, buffer, RATE_BYTES);
}
// Rho and rate-whitening for the decryption of the last ciphertext block.
// Since this last block may require padding, it is always copied to a buffer.
static void rho_whi_dec_last(uint32_t *state, uint8_t *out, const uint8_t *in, \
size_t inlen)
{
uint32_t buffer[RATE_WORDS];
uint32_t tmp1, tmp2;
uint8_t *bufptr;
int i, j;
memcpy(buffer, in, inlen);
if (inlen < RATE_BYTES) { // padding
bufptr = ((uint8_t *) buffer) + inlen;
memcpy(bufptr, (((uint8_t *) state) + inlen), (RATE_BYTES - inlen));
*bufptr ^= 0x80;
}
for (i = 0, j = RATE_WORDS/2; i < RATE_WORDS/2; i++, j++) {
tmp1 = state[i];
tmp2 = state[j];
state[i] ^= state[j] ^ buffer[i] ^ state[RATE_WORDS+i];
state[j] = tmp1 ^ buffer[j] ^ state[RATE_WORDS+CAP_INDEX(j)];
buffer[i] ^= tmp1;
buffer[j] ^= tmp2;
}
memcpy(out, buffer, inlen);
}
///////////////////////////////////////////////////////////////////////////////
///////////// LOW-LEVEL AEAD FUNCTIONS (FOR USE WITH FELICS-AEAD) /////////////
///////////////////////////////////////////////////////////////////////////////
// The Initialize function loads nonce and key into the state and executes the
// SPARKLE permutation with the big number of steps.
void Initialize(uint32_t *state, const uint8_t *key, const uint8_t *nonce)
{
// load nonce into the rate-part of the state
memcpy(state, nonce, NONCE_BYTES);
// load key into the capacity-part of the sate
memcpy((state + RATE_WORDS), key, KEY_BYTES);
// execute SPARKLE with big number of steps
sparkle_opt(state, STATE_BRANS, SPARKLE_STEPS_BIG);
}
// The ProcessAssocData function absorbs the associated data, which becomes
// only authenticated but not encrypted, into the state (in blocks of size
// RATE_BYTES). Note that this function MUST NOT be called when the length of
// the associated data is 0.
void ProcessAssocData(uint32_t *state, const uint8_t *in, size_t inlen)
{
// check whether 'in' can be casted to uint32_t pointer
int aligned = ((size_t) in) % UI32_ALIGN_BYTES == 0;
// printf("Address of 'in': %p\n", in);
// Main Authentication Loop
while (inlen > RATE_BYTES) {
// combined Rho and rate-whitening operation
rho_whi_aut(state, in, aligned);
// execute SPARKLE with slim number of steps
sparkle_opt(state, STATE_BRANS, SPARKLE_STEPS_SLIM);
inlen -= RATE_BYTES;
in += RATE_BYTES;
}
// Authentication of Last Block
// addition of constant A0 or A1 to the state
state[STATE_WORDS-1] ^= ((inlen < RATE_BYTES) ? CONST_A0 : CONST_A1);
// combined Rho and rate-whitening (incl. padding)
rho_whi_aut_last(state, in, inlen);
// execute SPARKLE with big number of steps
sparkle_opt(state, STATE_BRANS, SPARKLE_STEPS_BIG);
}
// The ProcessPlainText function encrypts the plaintext (in blocks of size
// RATE_BYTES) and generates the respective ciphertext. The uint8_t-array 'in'
// contains the plaintext and the ciphertext is written to uint8_t-array 'out'
// ('in' and 'out' can be the same array, i.e. they can have the same start
// address). Note that this function MUST NOT be called when the length of the
// plaintext is 0.
void ProcessPlainText(uint32_t *state, uint8_t *out, const uint8_t *in, \
size_t inlen)
{
// check whether 'in' and 'out' can be casted to uint32_t pointer
int aligned = (((size_t) in) | ((size_t) out)) % UI32_ALIGN_BYTES == 0;
// printf("Address of 'in' and 'out': %p, %p\n", in, out);
// Main Encryption Loop
while (inlen > RATE_BYTES) {
// combined Rho and rate-whitening operation
rho_whi_enc(state, out, in, aligned);
// execute SPARKLE with slim number of steps
sparkle_opt(state, STATE_BRANS, SPARKLE_STEPS_SLIM);
inlen -= RATE_BYTES;
out += RATE_BYTES;
in += RATE_BYTES;
}
// Encryption of Last Block
// addition of constant M2 or M3 to the state
state[STATE_WORDS-1] ^= ((inlen < RATE_BYTES) ? CONST_M2 : CONST_M3);
// combined Rho and rate-whitening (incl. padding)
rho_whi_enc_last(state, out, in, inlen);
// execute SPARKLE with big number of steps
sparkle_opt(state, STATE_BRANS, SPARKLE_STEPS_BIG);
}
// The Finalize function adds the key to the capacity part of the state.
void Finalize(uint32_t *state, const uint8_t *key)
{
uint32_t buffer[TAG_WORDS];
int i;
// to prevent (potentially) unaligned memory accesses
memcpy(buffer, key, KEY_BYTES);
// add key to the capacity-part of the state
for (i = 0; i < KEY_WORDS; i++)
state[RATE_WORDS+i] ^= buffer[i];
}
// The GenerateTag function generates an authentication tag.
void GenerateTag(uint32_t *state, uint8_t *tag)
{
memcpy(tag, (state + RATE_WORDS), TAG_BYTES);
}
// The VerifyTag function checks whether the given authentication tag is valid.
// It performs a simple constant-time comparison and returns 0 if the provided
// tag matches the computed tag and -1 otherwise.
int VerifyTag(uint32_t *state, const uint8_t *tag)
{
uint32_t buffer[TAG_WORDS], diff = 0;
int i;
// to prevent (potentially) unaligned memory accesses
memcpy(buffer, tag, TAG_BYTES);
// constant-time comparison: 0 if equal, -1 otherwise
for (i = 0; i < TAG_WORDS; i++)
diff |= (state[RATE_WORDS+i] ^ buffer[i]);
return (((int) (diff == 0)) - 1);
}
// The ProcessCipherText function decrypts the ciphertext (in blocks of size
// RATE_BYTES) and generates the respective plaintext. The uint8_t-array 'in'
// contains the ciphertext and the plaintext is written to uint8_t-array 'out'
// ('in' and 'out' can be the same array, i.e. they can have the same start
// address). Note that this function MUST NOT be called when the length of the
// ciphertext is 0.
void ProcessCipherText(uint32_t *state, uint8_t *out, const uint8_t *in, \
size_t inlen)
{
// check whether 'in' and 'out' can be casted to uint32_t pointer
int aligned = (((size_t) in) | ((size_t) out)) % UI32_ALIGN_BYTES == 0;
// printf("Address of 'in' and 'out': %p, %p\n", in, out);
// Main Decryption Loop
while (inlen > RATE_BYTES) {
// combined Rho and rate-whitening operation
rho_whi_dec(state, out, in, aligned);
// execute SPARKLE with slim number of steps
sparkle_opt(state, STATE_BRANS, SPARKLE_STEPS_SLIM);
inlen -= RATE_BYTES;
out += RATE_BYTES;
in += RATE_BYTES;
}
// Decryption of Last Block
// addition of constant M2 or M3 to the state
state[STATE_WORDS-1] ^= ((inlen < RATE_BYTES) ? CONST_M2 : CONST_M3);
// combined Rho and rate-whitening (incl. padding)
rho_whi_dec_last(state, out, in, inlen);
// execute SPARKLE with big number of steps
sparkle_opt(state, STATE_BRANS, SPARKLE_STEPS_BIG);
}
///////////////////////////////////////////////////////////////////////////////
////////////// HIGH-LEVEL AEAD FUNCTIONS (FOR USE WITH SUPERCOP) //////////////
///////////////////////////////////////////////////////////////////////////////
// High-level encryption function from SUPERCOP.
// nsec is kept for compatibility with SUPERCOP, but is not used.
int crypto_aead_encrypt(UChar *c, ULLInt *clen, const UChar *m, ULLInt mlen, \
const UChar *ad, ULLInt adlen, const UChar *nsec, const UChar *npub, \
const UChar *k)
{
uint32_t state[STATE_WORDS];
size_t msize = (size_t) mlen;
size_t adsize = (size_t) adlen;
Initialize(state, k, npub);
if (adsize) ProcessAssocData(state, ad, adsize);
if (msize) ProcessPlainText(state, c, m, msize);
Finalize(state, k);
GenerateTag(state, (c + msize));
*clen = msize;
*clen += TAG_BYTES;
return 0;
}
// High-level decryption function from SUPERCOP.
// nsec is kept for compatibility with SUPERCOP, but is not used.
int crypto_aead_decrypt(UChar *m, ULLInt *mlen, UChar *nsec, const UChar *c, \
ULLInt clen, const UChar *ad, ULLInt adlen, const UChar *npub, \
const UChar *k)
{
uint32_t state[STATE_WORDS];
size_t csize = (size_t) (clen - TAG_BYTES);
size_t adsize = (size_t) adlen;
int retval;
Initialize(state, k, npub);
if (adsize) ProcessAssocData(state, ad, adsize);
if (csize) ProcessCipherText(state, m, c, csize);
Finalize(state, k);
retval = VerifyTag(state, (c + csize));
*mlen = csize;
return retval;
}