Commit 008e386b by Enrico Pozzobon

Merge branch 'master' into patched-for-tester

parents ae78ab5e 121de979
/*
* Copyright (C) 2020 Southern Storm Software, Pty Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef LWCRYPTO_AEAD_COMMON_H
#define LWCRYPTO_AEAD_COMMON_H
#include <stddef.h>
/**
* \file aead-common.h
* \brief Definitions that are common across AEAD schemes.
*
* AEAD stands for "Authenticated Encryption with Associated Data".
* It is a standard API pattern for securely encrypting and
* authenticating packets of data.
*/
#ifdef __cplusplus
extern "C" {
#endif
/**
* \brief Encrypts and authenticates a packet with an AEAD scheme.
*
* \param c Buffer to receive the output.
* \param clen On exit, set to the length of the output which includes
* the ciphertext and the authentication tag.
* \param m Buffer that contains the plaintext message to encrypt.
* \param mlen Length of the plaintext message in bytes.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param nsec Secret nonce - normally not used by AEAD schemes.
* \param npub Points to the public nonce for the packet.
* \param k Points to the key to use to encrypt the packet.
*
* \return 0 on success, or a negative value if there was an error in
* the parameters.
*/
typedef int (*aead_cipher_encrypt_t)
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Decrypts and authenticates a packet with an AEAD scheme.
*
* \param m Buffer to receive the plaintext message on output.
* \param mlen Receives the length of the plaintext message on output.
* \param nsec Secret nonce - normally not used by AEAD schemes.
* \param c Buffer that contains the ciphertext and authentication
* tag to decrypt.
* \param clen Length of the input data in bytes, which includes the
* ciphertext and the authentication tag.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param npub Points to the public nonce for the packet.
* \param k Points to the key to use to decrypt the packet.
*
* \return 0 on success, -1 if the authentication tag was incorrect,
* or some other negative number if there was an error in the parameters.
*/
typedef int (*aead_cipher_decrypt_t)
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Hashes a block of input data.
*
* \param out Buffer to receive the hash output.
* \param in Points to the input data to be hashed.
* \param inlen Length of the input data in bytes.
*
* \return Returns zero on success or -1 if there was an error in the
* parameters.
*/
typedef int (*aead_hash_t)
(unsigned char *out, const unsigned char *in, unsigned long long inlen);
/**
* \brief Initializes the state for a hashing operation.
*
* \param state Hash state to be initialized.
*/
typedef void (*aead_hash_init_t)(void *state);
/**
* \brief Updates a hash state with more input data.
*
* \param state Hash state to be updated.
* \param in Points to the input data to be incorporated into the state.
* \param inlen Length of the input data to be incorporated into the state.
*/
typedef void (*aead_hash_update_t)
(void *state, const unsigned char *in, unsigned long long inlen);
/**
* \brief Returns the final hash value from a hashing operation.
*
* \param Hash state to be finalized.
* \param out Points to the output buffer to receive the hash value.
*/
typedef void (*aead_hash_finalize_t)(void *state, unsigned char *out);
/**
* \brief Aborbs more input data into an XOF state.
*
* \param state XOF state to be updated.
* \param in Points to the input data to be absorbed into the state.
* \param inlen Length of the input data to be absorbed into the state.
*
* \sa ascon_xof_init(), ascon_xof_squeeze()
*/
typedef void (*aead_xof_absorb_t)
(void *state, const unsigned char *in, unsigned long long inlen);
/**
* \brief Squeezes output data from an XOF state.
*
* \param state XOF state to squeeze the output data from.
* \param out Points to the output buffer to receive the squeezed data.
* \param outlen Number of bytes of data to squeeze out of the state.
*/
typedef void (*aead_xof_squeeze_t)
(void *state, unsigned char *out, unsigned long long outlen);
/**
* \brief No special AEAD features.
*/
#define AEAD_FLAG_NONE 0x0000
/**
* \brief The natural byte order of the AEAD cipher is little-endian.
*
* If this flag is not present, then the natural byte order of the
* AEAD cipher should be assumed to be big-endian.
*
* The natural byte order may be useful when formatting packet sequence
* numbers as nonces. The application needs to know whether the sequence
* number should be packed into the leading or trailing bytes of the nonce.
*/
#define AEAD_FLAG_LITTLE_ENDIAN 0x0001
/**
* \brief Meta-information about an AEAD cipher.
*/
typedef struct
{
const char *name; /**< Name of the cipher */
unsigned key_len; /**< Length of the key in bytes */
unsigned nonce_len; /**< Length of the nonce in bytes */
unsigned tag_len; /**< Length of the tag in bytes */
unsigned flags; /**< Flags for extra features */
aead_cipher_encrypt_t encrypt; /**< AEAD encryption function */
aead_cipher_decrypt_t decrypt; /**< AEAD decryption function */
unsigned char *expected; /**< AEAD encryption benchmark expected result */
} aead_cipher_t;
/**
* \brief Meta-information about a hash algorithm that is related to an AEAD.
*
* Regular hash algorithms should provide the "hash", "init", "update",
* and "finalize" functions. Extensible Output Functions (XOF's) should
* proivde the "hash", "init", "absorb", and "squeeze" functions.
*/
typedef struct
{
const char *name; /**< Name of the hash algorithm */
size_t state_size; /**< Size of the incremental state structure */
unsigned hash_len; /**< Length of the hash in bytes */
unsigned flags; /**< Flags for extra features */
aead_hash_t hash; /**< All in one hashing function */
aead_hash_init_t init; /**< Incremental hash/XOF init function */
aead_hash_update_t update; /**< Incremental hash update function */
aead_hash_finalize_t finalize; /**< Incremental hash finalize function */
aead_xof_absorb_t absorb; /**< Incremental XOF absorb function */
aead_xof_squeeze_t squeeze; /**< Incremental XOF squeeze function */
} aead_hash_algorithm_t;
/**
* \brief Check an authentication tag in constant time.
*
* \param plaintext Points to the plaintext data.
* \param plaintext_len Length of the plaintext in bytes.
* \param tag1 First tag to compare.
* \param tag2 Second tag to compare.
* \param tag_len Length of the tags in bytes.
*
* \return Returns -1 if the tag check failed or 0 if the check succeeded.
*
* If the tag check fails, then the \a plaintext will also be zeroed to
* prevent it from being used accidentally by the application when the
* ciphertext was invalid.
*/
int aead_check_tag
(unsigned char *plaintext, unsigned long long plaintext_len,
const unsigned char *tag1, const unsigned char *tag2,
unsigned tag_len);
/**
* \brief Check an authentication tag in constant time with a previous check.
*
* \param plaintext Points to the plaintext data.
* \param plaintext_len Length of the plaintext in bytes.
* \param tag1 First tag to compare.
* \param tag2 Second tag to compare.
* \param tag_len Length of the tags in bytes.
* \param precheck Set to -1 if previous check succeeded or 0 if it failed.
*
* \return Returns -1 if the tag check failed or 0 if the check succeeded.
*
* If the tag check fails, then the \a plaintext will also be zeroed to
* prevent it from being used accidentally by the application when the
* ciphertext was invalid.
*
* This version can be used to incorporate other information about the
* correctness of the plaintext into the final result.
*/
int aead_check_tag_precheck
(unsigned char *plaintext, unsigned long long plaintext_len,
const unsigned char *tag1, const unsigned char *tag2,
unsigned tag_len, int precheck);
#ifdef __cplusplus
}
#endif
#endif
#ifndef __DRYGASCON_ARM_SELECTOR_H__
#define __DRYGASCON_ARM_SELECTOR_H__
//Optional file to select the best implementation for each chip
#ifdef STM32H743xx
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FPU__
#endif
#ifdef STM32F746xx
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FPU__
#endif
#ifdef STM32F103xx
#define __DRYGASCON_ARM_SELECTOR_V7M__
#endif
#ifdef STM32L011xx
#define __DRYGASCON_ARM_SELECTOR_V6M__
#endif
#ifdef __SAM3X8E__
#define __DRYGASCON_ARM_SELECTOR_V7M__
#endif
//TODO: add more chips here
#ifdef __DRYGASCON_ARM_SELECTOR_V7M__
#ifdef __DRYGASCON_ARM_SELECTOR_FPU__
#define DRYGASCON_G_OPT drygascon128_g_v7m_fpu
#define DRYGASCON_F_OPT drygascon128_f_v7m_fpu
#define DRYGASCON_G0_OPT drygascon128_g0_v7m_fpu
#else
#define DRYGASCON_G_OPT drygascon128_g_v7m
#define DRYGASCON_F_OPT drygascon128_f_v7m
#define DRYGASCON_G0_OPT drygascon128_g0_v7m
#endif
#endif
#ifdef __DRYGASCON_ARM_SELECTOR_V6M__
#define DRYGASCON_G_OPT drygascon128_g_v6m
#define DRYGASCON_F_OPT drygascon128_f_v6m
//#define DRYGASCON_G0_OPT drygascon128_g0_v6m
#define DRYGASCON_ALIGN_INPUT_32
#endif
#endif
#include "drygascon.h"
int crypto_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
return drygascon128_aead_encrypt
(c, clen, m, mlen, ad, adlen, nsec, npub, k);
}
int crypto_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
return drygascon128_aead_decrypt
(m, mlen, nsec, c, clen, ad, adlen, npub, k);
}
#ifndef KNOT_API_H
#define KNOT_API_H
//k=n=tag=128 b=256 r=64 c=192
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#ifndef KNOT_CRYPTO_AEAD_H
#define KNOT_CRYPTO_AEAD_H
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
#endif
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "crypto_aead.h"
#include "api.h"
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define KNOT_CIPHER 1
#if defined(KNOT_CIPHER) && (KNOT_CIPHER == 1)
unsigned char constant6[63] = {
0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06,
0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29,
0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28,
0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24,
0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37,
0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26,
0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f,
0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20 };
/* State
* w4 w0
* w5 w1
* w6 w2
* w7 w3
*
* Sbox
t1 = ~a;
t2 = b & t1;
t3 = c ^ t2;
h = d ^ t3;
t5 = b | c;
t6 = d ^ t1;
g = t5 ^ t6;
t8 = b ^ d;
t9 = t3 & t6;
e = t8 ^ t9;
t11 = g & t8;
f = t3 ^ t11;
*
* Sbox after change
a = ~a;
s0 = b & a;
s0 = c ^ s0;
c = b | c;
a = d ^ a;
c = c ^ a;
s1 = b ^ d;
d = d ^ s0;
a = s0 & a;
a = s1 ^ a;
b = c & s1;
b = s0 ^ b;
*/
static void permutation256(unsigned char *in, int rounds, unsigned char *rc) {
uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
uint32_t s0, s1, s2;
uint32_t one = 0x1;
uint32_t ff = 0xff;
__asm volatile(
"ldr w0, [in] \n\t"
"ldr w4, [in, #4] \n\t"
"ldr w1, [in, #8] \n\t"
"ldr w5, [in, #12] \n\t"
"ldr w2, [in, #16] \n\t"
"ldr w6, [in, #20] \n\t"
"ldr w3, [in, #24] \n\t"
"ldr w7, [in, #28] \n\t"
"mov s0, 0xfff \n\t"
"mov s2, 0x1fff \n\t"
"lsl s2, s2, #12 \n\t"
"eors s2, s2, s0 \n\t"
"enc_loop: \n\t"
"/*add round const*/ \n\t"
"ldrb s0, [rc] \n\t"
"eors w0, w0, s0 \n\t"
"/*sbox first column*/ \n\t"
"mvns w0, w0 \n\t"
"ands s0, w1, w0 \n\t"
"eors s0, w2, s0 \n\t"
"orrs w2, w1, w2 \n\t"
"eors w0, w3, w0 \n\t"
"eors w2, w2, w0 \n\t"
"eors s1, w1, w3 \n\t"
"eors w3, w3, s0 \n\t"
"ands w0, s0, w0 \n\t"
"eors w0, s1, w0 \n\t"
"ands w1, w2, s1 \n\t"
"eors w1, s0, w1 \n\t"
"/*sbox second column*/ \n\t"
"mvns w4, w4 \n\t"
"ands s0, w5, w4 \n\t"
"eors s0, w6, s0 \n\t"
"orrs w6, w5, w6 \n\t"
"eors w4, w7, w4 \n\t"
"eors w6, w6, w4 \n\t"
"eors s1, w5, w7 \n\t"
"eors w7, w7, s0 \n\t"
"ands w4, s0, w4 \n\t"
"eors w4, s1, w4 \n\t"
"ands w5, w6, s1 \n\t"
"eors w5, s0, w5 \n\t"
"/*rotate shift left 1 bit*/ \n\t"
"ror s0, w1, #31 \n\t"
"ands s0, s0, one \n\t"
"lsl w1, w1, #1 \n\t"
"ror s1, w5, #31 \n\t"
"ands s1, s1, one \n\t"
"eors w1, w1, s1 \n\t"
"lsl w5, w5, #1 \n\t"
"eors w5, w5, s0 \n\t"
"/*rotate shift left 8 bits*/ \n\t"
"ror s0, w2, #24 \n\t"
"ands s0, s0, ff \n\t"
"lsl w2, w2, #8 \n\t"
"ror s1, w6, #24 \n\t"
"ands s1, s1, ff \n\t"
"eors w2, w2, s1 \n\t"
"lsl w6, w6, #8 \n\t"
"eors w6, w6, s0 \n\t"
"/*rotate shift left 25 bits*/ \n\t"
"ror s0, w3, #7 \n\t"
"ands s0, s0, s2 \n\t"
"lsl w3, w3, #25 \n\t"
"ror s1, w7, #7 \n\t"
"ands s1, s1, s2 \n\t"
"eors w3, w3, s1 \n\t"
"lsl w7, w7, #25 \n\t"
"eors w7, w7, s0 \n\t"
"/*loop control*/ \n\t"
"adds rc, rc, #1 \n\t"
"subs rounds, rounds, #1 \n\t"
"bne enc_loop \n\t"
"str w0, [in] \n\t"
"str w4, [in, #4] \n\t"
"str w1, [in, #8] \n\t"
"str w5, [in, #12] \n\t"
"str w2, [in, #16] \n\t"
"str w6, [in, #20] \n\t"
"str w3, [in, #24] \n\t"
"str w7, [in, #28] \n\t"
);
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
unsigned int u = 0;
unsigned int v = 0;
unsigned int v1 = 0;
unsigned int i;
unsigned int last_index = 0;
unsigned char *A = NULL;
unsigned char *M = NULL;
unsigned char S[32];
unsigned int *A32 = NULL;
unsigned int *M32 = NULL;
unsigned int *S32 = NULL;
unsigned int *C32 = NULL;
// pad associated data
if (adlen != 0) {
u = (adlen + 8) >> 3;
A = malloc(u << 3);
if (A == NULL) {
return -1;
}
memset(A, 0, u << 3);
memcpy(A, ad, adlen);
A[adlen] = 0x01;
A32 = (unsigned int *)A;
}
// pad plaintext data
if (mlen != 0) {
v = (mlen + 8) >> 3;
M = malloc(v << 3);
if (M == NULL) {
free(A);
return -1;
}
memset(M, 0, v << 3);
memcpy(M, m, mlen);
M[mlen] = 0x01;
M32 = (unsigned int *)M;
}
// initalization
memcpy(S, npub, CRYPTO_NPUBBYTES);
memcpy(S + CRYPTO_NPUBBYTES, k, CRYPTO_KEYBYTES);
permutation256(S, 52, constant6);
S32 = (unsigned int *)S;
// processiong associated data
if (adlen != 0) {
for (i = 0; i < u; i++) {
S32[0] ^= A32[0];
S32[1] ^= A32[1];
A32 = A32 + 2;
permutation256(S, 28, constant6);
}
}
S[31] ^= 0x80;
// Encryption processiong plaintext data
if (mlen != 0) {
C32 = (unsigned int *)c;
for (i = 0; i < v - 1; i++) {
S32[0] ^= M32[0];
S32[1] ^= M32[1];
M32 = M32 + 2;
C32[0] = S32[0];
C32[1] = S32[1];
C32 = C32 + 2;
permutation256(S, 28, constant6);
}
v1 = mlen % 8;
last_index = (v - 1) << 3;
for (i = 0; i < v1; i++) {
S[i] ^= M[last_index + i];
c[last_index + i] = S[i];
}
S[i] ^= 0x01;
}
// finalization
permutation256(S, 32, constant6);
// return tag
memcpy(c + mlen, S, CRYPTO_ABYTES);
*clen = mlen + CRYPTO_ABYTES;
if (A != NULL) {
free(A);
}
if (M != NULL) {
free(M);
}
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k)
{
unsigned int u;
unsigned int v = ((clen - CRYPTO_ABYTES) >> 3) + 1;
unsigned int v1;
unsigned int last_index;
unsigned int i;
unsigned char *A = NULL;
unsigned char S[32];
unsigned int *A32 = NULL;
unsigned int *M32 = NULL;
unsigned int *S32 = NULL;
unsigned int *C32 = NULL;
*mlen = 0;
if (clen < CRYPTO_ABYTES) {
return -1;
}
// pad associated data
if (adlen != 0) {
u = (adlen + 8) >> 3;
A = malloc(u << 3);
if (A == NULL) {
return -1;
}
memset(A, 0, u << 3);
memcpy(A, ad, adlen);
A[adlen] = 0x01;
A32 = (unsigned int *)A;
}
M32 = (unsigned int *)m;
C32 = (unsigned int *)c;
// initalization
memcpy(S, npub, CRYPTO_NPUBBYTES);
memcpy(S + CRYPTO_NPUBBYTES, k, CRYPTO_KEYBYTES);
permutation256(S, 52, constant6);
S32 = (unsigned int *)S;
// processiong associated data
if (adlen != 0) {
for (i = 0; i < u; i++) {
S32[0] ^= A32[0];
S32[1] ^= A32[1];
A32 = A32 + 2;
permutation256(S, 28, constant6);
}
}
S[31] ^= 0x80;
// Encryption processiong ciphertext data
if (clen != CRYPTO_ABYTES) {
C32 = (unsigned int *)c;
for (i = 0; i < v - 1; i++) {
M32[0] = S32[0] ^ C32[0];
M32[1] = S32[1] ^ C32[1];
S32[0] = C32[0];
S32[1] = C32[1];
M32 = M32 + 2;
C32 = C32 + 2;
permutation256(S, 28, constant6);
}
v1 = (clen - CRYPTO_ABYTES) % 8;
last_index = (v - 1) << 3;
for (i = 0; i < v1; i++) {
m[last_index + i] = S[i] ^ c[last_index + i];
S[i] = c[last_index + i];
}
S[i] ^= 0x01;
}
// finalization
permutation256(S, 32, constant6);
// return -1 if verification fails
for (i = 0; i < CRYPTO_ABYTES; i++) {
if (c[clen - CRYPTO_ABYTES + i] != S[i]) {
memset(m, 0, clen - CRYPTO_ABYTES);
return -1;
}
}
*mlen = clen - CRYPTO_ABYTES;
if (A != NULL) {
free(A);
}
return 0;
}
#else
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
return 0;
}
#endif
//k=n=tag=128 b=256 r=64 c=192
#define CRYPTO_KEYBYTES 16 //
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include"crypto_aead.h"
#include"api.h"
#include <string.h>
#define U32BIG(x) (x)
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define sbox(a, b, c, d, e, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#define packFormat(out,in) {\
t1 = U32BIG(((u32*)in)[0]); \
t2 = U32BIG(((u32*)in)[1]); \
t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \
t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \
t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \
t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \
t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \
t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \
t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \
t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \
out[0] = (t2 & 0xFFFF0000) | (t1 >> 16); \
out[1] = (t2 << 16) | (t1 & 0x0000FFFF); \
}
#define unpackFormat(out, in) {\
t2 = (in[0] & 0xFFFF0000) | (in[1] >> 16); \
t1 = (in[1] & 0x0000FFFF) | (in[0] << 16); \
t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \
t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \
t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \
t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \
t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \
t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \
t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \
t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \
*((u64*)out) = ((u64)t2 << 32 | t1); \
}
#define getU32Format(out, in) {\
t1, t2 = U32BIG(((u32*)in)[0]); \
t1 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t1 ^ (t1 << 1); \
t1 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t1 ^ (t1 << 2); \
t1 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t1 ^ (t1 << 4); \
t1 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t1 ^ (t1 << 8); \
*out = t2; \
}
#define ROUND256( constant6Format,lunNum) {\
s[0] ^= constant6Format[lunNum]>> 4;\
s[1] ^= constant6Format[lunNum]& 0x0f;\
sbox(s[0], s[2], s[4], s[6], s_temp[0], s_temp[2], s_temp[4], s_temp[6]);\
sbox(s[1], s[3], s[5], s[7], s_temp[1], s_temp[3], s_temp[5], s_temp[7]);\
s[0] = s_temp[0];\
s[1] = s_temp[1];\
s[2] = s_temp[3];\
s[3] = LOTR32(s_temp[2], 1);\
s[4] = LOTR32(s_temp[4], 4);\
s[5] = LOTR32(s_temp[5], 4);\
s[6] = LOTR32(s_temp[7], 12);\
s[7] = LOTR32(s_temp[6], 13);\
}
void printfFormat(char name[], u32 * in);
void printU8(char name[], u8 var[], long len, int offset);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#include"auxFormat.h"
#define RATE (64 / 8)
#define PR0_ROUNDS 52
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
unsigned char constant6Format[63] = {
/*constant6_aead_128v1:*/
0x1,
0x10,
0x2,
0x20,
0x4,
0x41,
0x11,
0x12,
0x22,
0x24,
0x45,
0x50,
0x3,
0x30,
0x6,
0x61,
0x15,
0x53,
0x33,
0x36,
0x67,
0x74,
0x46,
0x60,
0x5,
0x51,
0x13,
0x32,
0x26,
0x65,
0x54,
0x42,
0x21,
0x14,
0x43,
0x31,
0x16,
0x63,
0x35,
0x57,
0x72,
0x27,
0x75,
0x56,
0x62,
0x25,
0x55,
0x52,
0x23,
0x34,
0x47,
0x70,
0x7,
0x71,
0x17,
0x73,
0x37,
0x77,
0x76,
0x66,
0x64,
0x44,
0x40,
};
static void permutation256(unsigned int *in, int rounds, unsigned char *rc) {
uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
uint32_t s0, s1, s2;
uint32_t one = 0x1;
uint32_t i=0;
uint32_t ff = 0xff;
__asm volatile(
"ldr w0, [in] \n\t"
"ldr w4, [in, #4] \n\t"
"ldr w1, [in, #8] \n\t"
"ldr w5, [in, #12] \n\t"
"ldr w2, [in, #16] \n\t"
"ldr w6, [in, #20] \n\t"
"ldr w3, [in, #24] \n\t"
"ldr w7, [in, #28] \n\t"
"enc_loop: \n\t"
"/*add round const s0 s1*/ \n\t"
"ldrb s0, [rc] \n\t"
"LSR s1, s0, #4 \n\t"
"and s0, s0, 0xf \n\t"
"eors w4, w4, s0 \n\t"
"eors w0, w0, s1 \n\t"
"/*sbox first column*/ \n\t"
"mvns w0, w0 \n\t"
"ands s0, w1, w0 \n\t"
"eors s0, w2, s0 \n\t"
"orrs w2, w1, w2 \n\t"
"eors w0, w3, w0 \n\t"
"eors w2, w2, w0 \n\t"
"eors s1, w1, w3 \n\t"
"eors w3, w3, s0 \n\t"
"ands w0, s0, w0 \n\t"
"eors w0, s1, w0 \n\t"
"ands w1, w2, s1 \n\t"
"eors w1, s0, w1 \n\t"
"/*sbox second column*/ \n\t"
"mvns w4, w4 \n\t"
"ands s0, w5, w4 \n\t"
"eors s0, w6, s0 \n\t"
"orrs w6, w5, w6 \n\t"
"eors w4, w7, w4 \n\t"
"eors w6, w6, w4 \n\t"
"eors s1, w5, w7 \n\t"
"eors w7, w7, s0 \n\t"
"ands w4, s0, w4 \n\t"
"eors w4, s1, w4 \n\t"
"ands w5, w6, s1 \n\t"
"eors w5, s0, w5 \n\t"
"/*rotate shift left 1 bit*/ \n\t"
"mov s0, w5 \n\t"
"ROR w5, w1, #31 \n\t"
"mov w1, s0 \n\t"
"/*rotate shift left 8 bits*/ \n\t"
"ROR w2, w2, #28 \n\t"
"ROR w6, w6, #28 \n\t"
"/*rotate shift left 25 bits*/ \n\t"
"mov s0, w3 \n\t"
"ROR w3, w7, #20 \n\t"
"ROR w7, s0, #19 \n\t"
"/*loop control*/ \n\t"
"adds rc, rc, #1 \n\t"
"subs rounds, rounds, #1 \n\t"
"bne enc_loop \n\t"
"str w0, [in] \n\t"
"str w4, [in, #4] \n\t"
"str w1, [in, #8] \n\t"
"str w5, [in, #12] \n\t"
"str w2, [in, #16] \n\t"
"str w6, [in, #20] \n\t"
"str w3, [in, #24] \n\t"
"str w7, [in, #28] \n\t"
);
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
unsigned int i, j;
u32 s[8] = { 0 };
u32 dataFormat[2] = { 0 };
u8 tempData[8];
u32 s_temp[8] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
*clen = mlen + CRYPTO_ABYTES;
//initialization
packFormat(s, npub);
packFormat((s + 2), (npub + 8));
packFormat((s + 4), k);
packFormat((s + 6), (k + 8));
permutation256(s,PR0_ROUNDS,constant6Format);
// process associated data
if (adlen) {
while (adlen >= RATE) {
packFormat(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
permutation256(s,PR_ROUNDS,constant6Format);
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
permutation256(s,PR_ROUNDS,constant6Format);
}
s[6] ^= 0x80000000;
if (mlen) {
while (mlen >= RATE) {
packFormat(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
unpackFormat(c, s);
permutation256(s,PR_ROUNDS,constant6Format);
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen * sizeof(unsigned char));
tempData[mlen]= 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
unpackFormat(tempData, s);
memcpy(c, tempData, mlen * sizeof(unsigned char));
c +=mlen;
}
// finalization
permutation256(s,PRF_ROUNDS,constant6Format);
// return tag
unpackFormat(tempData, s);
memcpy(c, tempData, sizeof(tempData));
unpackFormat(tempData,(s + 2));
memcpy(c+8, tempData, sizeof(tempData));
// unpackFormat((c), s);
// unpackFormat((c+8),(s + 2));
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u8 i, j;
// initialization
u32 s[8] = { 0 };
u32 dataFormat[4] = { 0 };
u32 dataFormat_1[2] = { 0 };
u8 tempU8[32] = { 0 };
u8 tempData[8];
u32 s_temp[8] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
//initialization
packFormat(s, npub);
packFormat((s + 2), (npub + 8));
packFormat((s + 4), k);
packFormat((s + 6), (k + 8));
permutation256(s,PR0_ROUNDS,constant6Format);
// process associated data
if (adlen) {
while (adlen >= RATE) {
packFormat(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
permutation256(s,PR_ROUNDS,constant6Format);
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
permutation256(s,PR_ROUNDS,constant6Format);
}
s[6] ^= 0x80000000;
// process c
clen = clen - CRYPTO_KEYBYTES;
if (clen) {
while (clen >= RATE) {
packFormat(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
unpackFormat(m, dataFormat_1);
s[0] = dataFormat[0];
s[1] = dataFormat[1];
permutation256(s,PR_ROUNDS,constant6Format);
clen -= RATE;
m += RATE;
c += RATE;
}
unpackFormat(tempU8, s);
for (i = 0; i < clen; ++i, ++m, ++c)
{
*m = tempU8[i]^ *c;
tempU8[i] = *c;
}
tempU8[i] ^= 0x01;
packFormat(s, tempU8);
}
// finalization
permutation256(s,PRF_ROUNDS,constant6Format);
// return tag
packFormat(dataFormat, c);
packFormat((dataFormat + 2), (c +8));
if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3]) {
return -1;
}
return 0;
}
#ifndef KNOT_API_H
#define KNOT_API_H
//k=n=tag=128 b=384 r=192 c=192
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#ifndef KNOT_CRYPTO_AEAD_H
#define KNOT_CRYPTO_AEAD_H
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
#endif
......@@ -3,3 +3,5 @@
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
//#include<malloc.h>
#include"crypto_aead.h"
#include"api.h"
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define U32BIG(x) (x)
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
//////////////////puck begin
//&:5 <<:4 |:4
#define puckU32ToThree(x){\
x &= 0x92492492;\
x = (x | (x << 2)) & 0xc30c30c3;\
x = (x | (x << 4)) & 0xf00f00f0;\
x = (x | (x << 8)) & 0xff0000ff;\
x = (x | (x << 16)) & 0xfff00000;\
}
#define unpuckU32ToThree(x){\
x &= 0xfff00000;\
x = (x | (x >> 16)) & 0xff0000ff;\
x = (x | (x >> 8)) & 0xf00f00f0;\
x = (x | (x >> 4)) & 0xc30c30c3;\
x = (x | (x >> 2)) & 0x92492492;\
}
//ʹ u8 t2_64, t2_65;u32 temp2[3];t2;
#define packU32FormatToThreePacket( out, in) {\
t2 = U32BIG(((u32*)in)[0]); \
t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t2 = t2 << 2; \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp2[0] >> 22); \
out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
//t9 t1 t2 t1_32 t2_64 t2_65 temp0[3] temp1[3] temp2[3]
#define packU96FormatToThreePacket(out, in) {\
t9 = U32BIG(((u32*)in)[2]); \
t1 = U32BIG(((u32*)in)[1]); \
t2 = U32BIG(((u32*)in)[0]); \
t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t1 = t1 << 1; \
t2 = t2 << 2; \
temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \
puckU32ToThree(temp0[0]); \
puckU32ToThree(temp0[1]); \
puckU32ToThree(temp0[2]); \
temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \
puckU32ToThree(temp1[0]); \
puckU32ToThree(temp1[1]); \
puckU32ToThree(temp1[2]); \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \
out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
//ʹ u8 t2_64, t2_65;u32 temp2[3];t2;
#define unpackU32FormatToThreePacket(out, in) {\
temp2[0] = (in[0] & 0x000003ff) << 22; \
t2_64 = ((in[1] & 0x00000400) << 21); \
temp2[1] = (in[1] & 0x000003ff) << 22; \
t2_65 = ((in[2] & 0x00000400) << 20); \
temp2[2] = (in[2] & 0x000003ff) << 22; \
unpuckU32ToThree(temp2[0]); \
unpuckU32ToThree(temp2[1]); \
unpuckU32ToThree(temp2[2]); \
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \
*(u32*)(out) = U32BIG(t2); \
}
//u32 temp0[3] = { 0 };u32 temp1[3] = { 0 };u32 temp2[3] = { 0 };u32 t1_32, t2_64, t2_65;t9,t1,t2,
#define unpackU96FormatToThreePacket( out, in) {\
temp0[0] = in[0] & 0xffe00000; \
temp1[0] = (in[0] & 0x001ffc00) << 11; \
temp2[0] = (in[0] & 0x000003ff) << 22; \
temp0[1] = in[1] & 0xffe00000; \
temp1[1] = (in[1] & 0x001ff800) << 11; \
t2_64 = ((in[1] & 0x00000400) << 21); \
temp2[1] = (in[1] & 0x000003ff) << 22; \
temp0[2] = in[2] & 0xffc00000; \
t1_32 = ((in[2] & 0x00200000) << 10); \
temp1[2] = (in[2] & 0x001ff800) << 11; \
t2_65 = ((in[2] & 0x00000400) << 20); \
temp2[2] = (in[2] & 0x000003ff) << 22; \
unpuckU32ToThree(temp0[0]); \
unpuckU32ToThree(temp0[1]); \
unpuckU32ToThree(temp0[2]); \
t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \
unpuckU32ToThree(temp1[0]); \
unpuckU32ToThree(temp1[1]); \
unpuckU32ToThree(temp1[2]); \
t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \
unpuckU32ToThree(temp2[0]); \
unpuckU32ToThree(temp2[1]); \
unpuckU32ToThree(temp2[2]); \
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \
*(u32*)(out) = U32BIG(t2); \
*(u32*)(out + 4) = U32BIG(t1); \
*(u32*)(out + 8) = U32BIG(t9); \
}
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define sbox(a, b, c, d, e, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define U96_BIT_LOTR32_1(t0,t1,t2,t3,t4,t5){\
t3= t1;\
t4 = t2;\
t5 = LOTR32(t0, 1); \
}
#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t2, 2);\
t4 =LOTR32(t0, 3);\
t5 = LOTR32(t1, 3); \
}
//55=3*18+1
#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t1, 18); \
t4 = LOTR32(t2, 18);\
t5 = LOTR32(t0, 19); \
}
/*
s0 s1 s2
s3 s4 s5
s6 s7 s8
s9 s10 s11
*/
void printU32State(char name[], u32* var, long len);
void printfU96Format(char name[], u32 * s);
//////////////////puck end
void printU8(char name[], u8 var[], int len, int offset);
void printfU96Format(char name[], u32 * s);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#ifndef KNOT_API_H
#define KNOT_API_H
//k=n=tag=128 b=384 r=96 c=288
#define CRYPTO_KEYBYTES 24
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 24
#define CRYPTO_ABYTES 24
#define CRYPTO_NOOVERLAP 1
#endif
#ifndef KNOT_CRYPTO_AEAD_H
#define KNOT_CRYPTO_AEAD_H
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
#endif
#define CRYPTO_KEYBYTES 24
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 24
#define CRYPTO_ABYTES 24
#define CRYPTO_NOOVERLAP 1
//#include<malloc.h>
#include"crypto_aead.h"
#include"api.h"
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define U32BIG(x) (x)
#define U16BIG(x) (x)
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define sbox(a, b, c, d, e, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
//////////////////puck begin
//&:5 <<:4 |:4
#define puckU32ToThree(x){\
x &= 0x92492492;\
x = (x | (x << 2)) & 0xc30c30c3;\
x = (x | (x << 4)) & 0xf00f00f0;\
x = (x | (x << 8)) & 0xff0000ff;\
x = (x | (x << 16)) & 0xfff00000;\
}
#define unpuckU32ToThree(x){\
x &= 0xfff00000;\
x = (x | (x >> 16)) & 0xff0000ff;\
x = (x | (x >> 8)) & 0xf00f00f0;\
x = (x | (x >> 4)) & 0xc30c30c3;\
x = (x | (x >> 2)) & 0x92492492;\
}
//u32 t1 u32 t2 u8 t2_64 , t2_65 ;u32 temp1[3] = { 0 };u32 temp2[3] = { 0 };
#define packU48FormatToThreePacket( out, in) {\
t1 = (u32)U16BIG(*(u16*)(in + 4)); \
t2 = U32BIG(*(u32*)(in)); \
t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t1 = t1 << 1; \
t2 = t2 << 2; \
temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \
puckU32ToThree(temp1[0]); \
puckU32ToThree(temp1[1]); \
puckU32ToThree(temp1[2]); \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp1[0] >> 11) | (temp2[0] >> 22); \
out[1] = (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] = (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
/*
void packU96FormatToThreePacket(u32 * out, u8 * in) {
u32 t0 = U32BIG(((u32*)in)[2]);
u32 t1 = U32BIG(((u32*)in)[1]);
u32 t2 = U32BIG(((u32*)in)[0]);
u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6;
t1 = t1 << 1;
t2 = t2 << 2;
u32 temp0[3] = { 0 };
temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2;
puckU32ToThree(temp0[0]);
puckU32ToThree(temp0[1]);
puckU32ToThree(temp0[2]);
u32 temp1[3] = { 0 };
temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2;
puckU32ToThree(temp1[0]);
puckU32ToThree(temp1[1]);
puckU32ToThree(temp1[2]);
u32 temp2[3] = { 0 };
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2;
puckU32ToThree(temp2[0]);
puckU32ToThree(temp2[1]);
puckU32ToThree(temp2[2]);
out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22);
out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22);
out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22);
}
*/
//t9 t1 t2 t1_32 t2_64 t2_65 temp0[3] temp1[3] temp2[3]
#define packU96FormatToThreePacket(out, in) {\
t9 = U32BIG(((u32*)in)[2]); \
t1 = U32BIG(((u32*)in)[1]); \
t2 = U32BIG(((u32*)in)[0]); \
t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t1 = t1 << 1; \
t2 = t2 << 2; \
temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \
puckU32ToThree(temp0[0]); \
puckU32ToThree(temp0[1]); \
puckU32ToThree(temp0[2]); \
temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \
puckU32ToThree(temp1[0]); \
puckU32ToThree(temp1[1]); \
puckU32ToThree(temp1[2]); \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \
out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
/*
void unpackU96FormatToThreePacket(u8 * out, u32 * in) {
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
u32 t1_32, t2_64, t2_65;
u32 t0, t1, t2;
temp0[0] = in[0] & 0xffe00000;
temp1[0] = (in[0] & 0x001ffc00) << 11;
temp2[0] = (in[0] & 0x000003ff) << 22;
temp0[1] = in[1] & 0xffe00000;
temp1[1] = (in[1] & 0x001ff800) << 11;
t2_64 = ((in[1] & 0x00000400) << 21);
temp2[1] = (in[1] & 0x000003ff) << 22;
temp0[2] = in[2] & 0xffc00000;
t1_32 = ((in[2] & 0x00200000) << 10);
temp1[2] = (in[2] & 0x001ff800) << 11;
t2_65 = ((in[2] & 0x00000400) << 20);
temp2[2] = (in[2] & 0x000003ff) << 22;
unpuckU32ToThree(temp0[0]);
unpuckU32ToThree(temp0[1]);
unpuckU32ToThree(temp0[2]);
t0 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2;
unpuckU32ToThree(temp1[0]);
unpuckU32ToThree(temp1[1]);
unpuckU32ToThree(temp1[2]);
t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1);
unpuckU32ToThree(temp2[0]);
unpuckU32ToThree(temp2[1]);
unpuckU32ToThree(temp2[2]);
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2);
*(u32*)(out) = U32BIG(t2);
*(u32*)(out + 4) = U32BIG(t1);
*(u32*)(out + 8) = U32BIG(t0);
}
*/
//u32 temp0[3] = { 0 };u32 temp1[3] = { 0 };u32 temp2[3] = { 0 };u32 t1_32, t2_64, t2_65;t9,t1,t2,
#define unpackU96FormatToThreePacket( out, in) {\
temp0[0] = in[0] & 0xffe00000; \
temp1[0] = (in[0] & 0x001ffc00) << 11; \
temp2[0] = (in[0] & 0x000003ff) << 22; \
temp0[1] = in[1] & 0xffe00000; \
temp1[1] = (in[1] & 0x001ff800) << 11; \
t2_64 = ((in[1] & 0x00000400) << 21); \
temp2[1] = (in[1] & 0x000003ff) << 22; \
temp0[2] = in[2] & 0xffc00000; \
t1_32 = ((in[2] & 0x00200000) << 10); \
temp1[2] = (in[2] & 0x001ff800) << 11; \
t2_65 = ((in[2] & 0x00000400) << 20); \
temp2[2] = (in[2] & 0x000003ff) << 22; \
unpuckU32ToThree(temp0[0]); \
unpuckU32ToThree(temp0[1]); \
unpuckU32ToThree(temp0[2]); \
t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \
unpuckU32ToThree(temp1[0]); \
unpuckU32ToThree(temp1[1]); \
unpuckU32ToThree(temp1[2]); \
t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \
unpuckU32ToThree(temp2[0]); \
unpuckU32ToThree(temp2[1]); \
unpuckU32ToThree(temp2[2]); \
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \
*(u32*)(out) = U32BIG(t2); \
*(u32*)(out + 4) = U32BIG(t1); \
*(u32*)(out + 8) = U32BIG(t9); \
}
#define U96_BIT_LOTR32_1(t0,t1,t2,t3,t4,t5){\
t3= t1;\
t4 = t2;\
t5 = LOTR32(t0, 1); \
}
#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t2, 2);\
t4 =LOTR32(t0, 3);\
t5 = LOTR32(t1, 3); \
}
//55=3*18+1
#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t1, 18); \
t4 = LOTR32(t2, 18);\
t5 = LOTR32(t0, 19); \
}
/*
s0 s1 s2
s3 s4 s5
s6 s7 s8
s9 s10 s11
*/
#define ROUND384(lunNum) {\
s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\
s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\
s[2] ^= constant7Format[lunNum] & 0x7;\
sbox(s[0], s[3], s[6], s[9] , s_temp[0], s_temp[3], s_temp[6], s_temp[9]);\
sbox(s[1], s[4], s[7], s[10], s_temp[1], s_temp[4], s_temp[7], s_temp[10]);\
sbox(s[2], s[5], s[8], s[11], s_temp[2], s_temp[5], s_temp[8], s_temp[11]);\
s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2];\
U96_BIT_LOTR32_1(s_temp[3], s_temp [4], s_temp[ 5], s[3], s[4], s[5]);\
U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\
U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\
}
#define ROUND384Full(lunNum) {\
printf(" constant7Format[%d]=%08x\n", lunNum, constant7Format[lunNum]);\
s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\
s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\
s[2] ^= constant7Format[lunNum] & 0x7;\
printfU96Format("addition of round constant output",s);\
sbox(s[0], s[3], s[6], s[9] , s_temp[0], s_temp[3], s_temp[6], s_temp[9]);\
sbox(s[1], s[4], s[7], s[10], s_temp[1], s_temp[4], s_temp[7], s_temp[10]);\
sbox(s[2], s[5], s[8], s[11], s_temp[2], s_temp[5], s_temp[8], s_temp[11]);\
printfU96Format("substitution layer output",s_temp);\
s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2];\
U96_BIT_LOTR32_1(s_temp[3], s_temp [4], s_temp[ 5], s[3], s[4], s[5]);\
U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\
U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\
printfU96Format("linear diffusion layer output", s);\
}
void printBinarySimp(unsigned char * str, u8 *a, int len);
//void packU96FormatToThreePacketFull(unsigned int * out, u8 * in);
//void unpackU96FormatToThreePacketFull(u8 * out, unsigned int * in);
//void packU96FormatToThreePacket(u32 * out, u8 * in);
//void unpackU96FormatToThreePacket(u8 * out, u32 * in);
void printU32State(char name[], unsigned int* var, long len);
void printfU96Format(char name[], unsigned int * s);
//////////////////puck end
void printU8(char name[], u8 var[], int len, int offset);
void printfU96Format(char name[], u32 * s);
////////////constant begin//
//unsigned char constant7Format[127];
void puckU8FormatToThreePacket(u8 in, u8 *out);
//void test_puckU8FormatToThreePacket();
////////////constant end//
static void permutation384(unsigned int *in, int rounds, unsigned char *rc);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#include"auxFormat.h"
#define aead_RATE (96 / 8)
#define PR0_ROUNDS 76
#define PR_ROUNDS 40
#define PRF_ROUNDS 44
unsigned char constant7Format[127] = {
/*constant7Format[127]:*/
0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90,
0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2,
0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99,
0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b,
0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92,
0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8,
0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f,
0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1,
0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46,
0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb,
0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, };
/* State
* w8 w4 w0
* w9 w5 w1
* w10 w6 w2
* w11 w7 w3
*/
static void permutation384(unsigned int *in, int rounds, unsigned char *rc) {
uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11;
uint32_t s0, s1, s2;
uint32_t i=0;
__asm volatile(
"ldr w0, [in] \n\t"
"ldr w4, [in, #4] \n\t"
"ldr w8, [in, #8] \n\t"
"ldr w1, [in, #12] \n\t"
"ldr w5, [in, #16] \n\t"
"ldr w9, [in, #20] \n\t"
"ldr w2, [in, #24] \n\t"
"ldr w6, [in, #28] \n\t"
"ldr w10, [in, #32] \n\t"
"ldr w3, [in, #36] \n\t"
"ldr w7, [in, #40] \n\t"
"ldr w11, [in, #44] \n\t"
"enc_loop: \n\t"
"/*add round const s0 s1*/ \n\t"
"ldrb s0, [rc] \n\t"
"LSR s1, s0, #6 \n\t"
"and s1, s1, 0x3 \n\t"
"LSR s2, s0, #3 \n\t"
"and s2, s2, 0x7 \n\t"
"and s0, s0, 0x7 \n\t"
"eors w8, w8, s0 \n\t"
"eors w4, w4, s2 \n\t"
"eors w0, w0, s1 \n\t"
"/*sbox first column*/ \n\t"
"mvns w0, w0 \n\t"
"ands s0, w1, w0 \n\t"
"eors s0, w2, s0 \n\t"
"orrs w2, w1, w2 \n\t"
"eors w0, w3, w0 \n\t"
"eors w2, w2, w0 \n\t"
"eors s1, w1, w3 \n\t"
"eors w3, w3, s0 \n\t"
"ands w0, s0, w0 \n\t"
"eors w0, s1, w0 \n\t"
"ands w1, w2, s1 \n\t"
"eors w1, s0, w1 \n\t"
"/*sbox second column*/ \n\t"
"mvns w4, w4 \n\t"
"ands s0, w5, w4 \n\t"
"eors s0, w6, s0 \n\t"
"orrs w6, w5, w6 \n\t"
"eors w4, w7, w4 \n\t"
"eors w6, w6, w4 \n\t"
"eors s1, w5, w7 \n\t"
"eors w7, w7, s0 \n\t"
"ands w4, s0, w4 \n\t"
"eors w4, s1, w4 \n\t"
"ands w5, w6, s1 \n\t"
"eors w5, s0, w5 \n\t"
"/*sbox third column*/ \n\t"
"mvns w8, w8 \n\t"
"ands s0, w9, w8 \n\t"
"eors s0, w10, s0 \n\t"
"orrs w10, w9, w10 \n\t"
"eors w8, w11, w8 \n\t"
"eors w10, w10, w8 \n\t"
"eors s1, w9, w11 \n\t"
"eors w11, w11, s0 \n\t"
"ands w8, s0, w8 \n\t"
"eors w8, s1, w8 \n\t"
"ands w9, w10, s1 \n\t"
"eors w9, s0, w9 \n\t"
"/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"
"mov s0, w1 \n\t"
"mov w1, w5 \n\t"
"mov w5, w9 \n\t"
"ROR w9, s0, #31 \n\t"
"/*rotate shift left 8 bits [w10 w6 w2-> w6,3) (w2,3) ( w10,2)]*/ \n\t"
"mov s0, w10 \n\t"
"ROR w10, w6 , #29 \n\t"
"ROR w6, w2 , #29 \n\t"
"ROR w2, s0, #30 \n\t"
"/*rotate shift left 55 bit [w11 w7 w3-> w3,13) (w11,14) ( w7,14)] */ \n\t"
"mov s0, w3 \n\t"
"ROR w3, w7 , #14 \n\t"
"ROR w7, w11 , #14 \n\t"
"ROR w11, s0, #13 \n\t"
"/*loop control*/ \n\t"
"adds rc, rc, #1 \n\t"
"subs rounds, rounds, #1 \n\t"
"bne enc_loop \n\t"
"str w0, [in] \n\t"
"str w4, [in, #4] \n\t"
"str w8, [in, #8] \n\t"
"str w1, [in, #12] \n\t"
"str w5, [in, #16] \n\t"
"str w9, [in, #20] \n\t"
"str w2, [in, #24] \n\t"
"str w6, [in, #28] \n\t"
"str w10, [in, #32] \n\t"
"str w3, [in, #36] \n\t"
"str w7, [in, #40] \n\t"
"str w11, [in, #44] \n\t"
);
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
u8 i;
u32 s[12] = { 0 };
u32 dataFormat[3] = { 0 };
u8 tempData[12] = { 0 };
u32 s_temp[12] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t1_32, t2_64, t2_65;
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
*clen = mlen + CRYPTO_ABYTES;
// initialization
packU96FormatToThreePacket(s, npub);
packU96FormatToThreePacket((s + 3), (npub + 12));
packU96FormatToThreePacket((s + 6), k);
packU96FormatToThreePacket((s + 9), (k + 12));
permutation384(s,PR0_ROUNDS,constant7Format);
// process associated data
if (adlen) {
while (adlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
permutation384(s,PR_ROUNDS,constant7Format);
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
permutation384(s,PR_ROUNDS,constant7Format);
}
s[9] ^= 0x80000000;
if (mlen) {
while (mlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
unpackU96FormatToThreePacket(c, s);
permutation384(s,PR_ROUNDS,constant7Format);
mlen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen);
tempData[mlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
unpackU96FormatToThreePacket(tempData, s);
memcpy(c, tempData, mlen);
c += mlen;
}
// finalization
permutation384(s,PRF_ROUNDS,constant7Format);
// return tag
unpackU96FormatToThreePacket(c, s);
unpackU96FormatToThreePacket((c + 12), (s + 3));
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u8 i, j;
u32 s[12] = { 0 };
u32 dataFormat[6] = { 0 };
u32 dataFormat_1[3] = { 0 };
u8 tempData[12] = { 0 };
u8 tempU8[48] = { 0 };
u32 s_temp[12] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t1_32, t2_64, t2_65;
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
// initialization
packU96FormatToThreePacket(s, npub);
packU96FormatToThreePacket((s + 3), (npub + 12));
packU96FormatToThreePacket((s + 6), k);
packU96FormatToThreePacket((s + 9), (k + 12));
permutation384(s,PR0_ROUNDS,constant7Format);
// process associated data
if (adlen) {
while (adlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
permutation384(s,PR_ROUNDS,constant7Format);
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
permutation384(s,PR_ROUNDS,constant7Format);
}
s[9] ^= 0x80000000;
clen -= CRYPTO_ABYTES;
if (clen) {
while (clen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
dataFormat_1[2] = s[2] ^ dataFormat[2];
unpackU96FormatToThreePacket(m, dataFormat_1);
s[0] = dataFormat[0];
s[1] = dataFormat[1];
s[2] = dataFormat[2];
permutation384(s,PR_ROUNDS,constant7Format);
clen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
unpackU96FormatToThreePacket(tempU8, s);
for (i = 0; i < clen; ++i, ++m, ++c)
{
*m = tempU8[i] ^ *c;
tempU8[i] = *c;
}
tempU8[i] ^= 0x01;
packU96FormatToThreePacket(s, tempU8);
}
// finalization
permutation384(s,PRF_ROUNDS,constant7Format);
// return tag
packU96FormatToThreePacket(dataFormat, c);
packU96FormatToThreePacket((dataFormat + 3), (c + 12));
if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3]
|| dataFormat[4] != s[4] || dataFormat[5] != s[5]) {
return -1;
}
return 0;
}
#ifndef KNOT_API_H
#define KNOT_API_H
//k=n=tag=256 b=512 r=128 c=384
#define CRYPTO_KEYBYTES 32
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 32
#define CRYPTO_ABYTES 32
#define CRYPTO_NOOVERLAP 1
#endif
#ifndef KNOT_CRYPTO_AEAD_H
#define KNOT_CRYPTO_AEAD_H
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
#endif
#define CRYPTO_KEYBYTES 32 //256/8=32
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 32
#define CRYPTO_ABYTES 32
#define CRYPTO_NOOVERLAP 1
//#include<malloc.h>
#include"crypto_aead.h"
#include"api.h"
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#define U32BIG(x) (x)
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define sbox(a, b, c, d, e, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
void printfU128Format(char name[], u32 * in);
void printU8(char name[], u8 var[], long len, int offset);
//ʹt9
#define puck32(in)\
{\
t9 = (in ^ (in >> 1)) & 0x22222222; in ^= t9 ^ (t9 << 1);\
t9 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= t9 ^ (t9 << 2);\
t9 = (in ^ (in >> 4)) & 0x00F000F0; in ^= t9 ^ (t9 << 4);\
t9 = (in ^ (in >> 8)) & 0x0000FF00; in ^= t9 ^ (t9 << 8);\
}
//ʹt9
#define unpuck32(t0){\
t9 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= t9 ^ (t9 << 8); \
t9 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= t9 ^ (t9 << 4); \
t9 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= t9 ^ (t9 << 2); \
t9 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= t9 ^ (t9 << 1); \
}
//u32 t1, t2, t3,t8,
#define packU128FormatToFourPacket(out,in) {\
t8 = U32BIG(((u32*)in)[0]); \
t1 = U32BIG(((u32*)in)[1]); \
t2 = U32BIG(((u32*)in)[2]); \
t3 = U32BIG(((u32*)in)[3]); \
puck32(t8); puck32(t8); \
puck32(t1); puck32(t1); \
puck32(t2); puck32(t2); \
puck32(t3); puck32(t3); \
out[3] = t3 & 0xff000000 | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t8 >> 24); \
out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) | ((t1 >> 8) & 0x0000ff00) | ((t8 >> 16) & 0x000000ff); \
out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) | (t1 & 0x0000ff00) | ((t8 >> 8) & 0x000000ff); \
out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) | ((t1 << 8) & 0x0000ff00) | (t8 & 0x000000ff); \
}
//u32 dataFormat[4],u32 t1, t2, t3,t8,
#define unpackU128FormatToFourPacket( out, in) {\
memcpy(dataFormat, in, sizeof(unsigned int) * 4); \
t3 = dataFormat[3] & 0xff000000 | ((dataFormat[2] >> 8) & 0x00ff0000) | ((dataFormat[1] >> 16) & 0x0000ff00) | (dataFormat[0] >> 24); \
t2 = ((dataFormat[3] << 8) & 0xff000000) | (dataFormat[2] & 0x00ff0000) | ((dataFormat[1] >> 8) & 0x0000ff00) | ((dataFormat[0] >> 16) & 0x000000ff); \
t1 = ((dataFormat[3] << 16) & 0xff000000) | ((dataFormat[2] << 8) & 0x00ff0000) | (dataFormat[1] & 0x0000ff00) | ((dataFormat[0] >> 8) & 0x000000ff); \
t8 = ((dataFormat[3] << 24) & 0xff000000) | ((dataFormat[2] << 16) & 0x00ff0000) | ((dataFormat[1] << 8) & 0x0000ff00) | (dataFormat[0] & 0x000000ff); \
unpuck32(t8); unpuck32(t8); \
unpuck32(t1); unpuck32(t1); \
unpuck32(t2); unpuck32(t2); \
unpuck32(t3); unpuck32(t3); \
((u32*)out)[0] = U32BIG(t8); \
((u32*)out)[1] = U32BIG(t1); \
((u32*)out)[2] = U32BIG(t2); \
((u32*)out)[3] = U32BIG(t3); \
}
//u32 t1 ;u32 t2 =
#define packU64FormatToFourPacket( out, in) {\
t1 = U32BIG(((u32*)in)[0]); \
t2 = U32BIG(((u32*)in)[1]); \
puck32(t1); \
puck32(t1); \
puck32(t2); \
puck32(t2); \
out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); \
out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); \
out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); \
out[0] = ((t2 << 8) & 0x0000ff00) | (t1 & 0x000000ff); \
}
#define BIT_LOTR32_1(t0,t1,t2,t3,t4,t5,t6,t7){\
t4= LOTR32(t3, 1);\
t5 = t0;\
t6 = t1; \
t7 = t2; \
}
#define BIT_LOTR32_16(t0,t1,t2,t3,t4,t5,t6,t7){\
t4= LOTR32(t0, 4);\
t5 = LOTR32(t1, 4);\
t6 = LOTR32(t2, 4); \
t7 = LOTR32(t3, 4); \
}
#define BIT_LOTR32_25(t0,t1,t2,t3,t4,t5,t6,t7){\
t4= LOTR32(t3, 7);\
t5 = LOTR32(t0, 6);\
t6 = LOTR32(t1, 6); \
t7 = LOTR32(t2, 6); \
}
#define ROUND512( arr,lunNum) {\
s[3] ^= (arr[lunNum] >> 6) & 0x3;\
s[2] ^= (arr[lunNum] >> 4) & 0x3;\
s[1] ^= (arr[lunNum] >> 2) & 0x3;\
s[0] ^= arr[lunNum] & 0x3;\
sbox(s[0], s[4], s[8], s[12], s_temp[0], s_temp[4], s_temp[8], s_temp[12]);\
sbox(s[1], s[5], s[9], s[13], s_temp[1], s_temp[5], s_temp[9], s_temp[13]);\
sbox(s[2], s[6], s[10], s[14], s_temp[2], s_temp[6], s_temp[10], s_temp[14]);\
sbox(s[3], s[7], s[11], s[15], s_temp[3], s_temp[7], s_temp[11], s_temp[15]);\
s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2], s[3] = s_temp[3];\
BIT_LOTR32_1(s_temp[4], s_temp[5], s_temp[6], s_temp[7], s[4], s[5], s[6], s[7]);\
BIT_LOTR32_16(s_temp[8], s_temp[9], s_temp[10], s_temp[11], s[8], s[9], s[10], s[11]);\
BIT_LOTR32_25(s_temp[12], s_temp[13], s_temp[14], s_temp[15], s[12], s[13], s[14], s[15]);\
}
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
This source diff could not be displayed because it is too large. You can view the blob instead.
//API required by the NIST for the LWC competition
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k);
//API required by the NIST for the LWC competition
int crypto_aead_decrypt(unsigned char *m, unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k);
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned int u64;
typedef struct {
u8 tk1[16]; //to manipulate tk1 byte-wise
u32 rtk1[4*16]; //to avoid tk schedule recomputations
u32 rtk[4*SKINNY128_384_ROUNDS]; //all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
#define SKINNY128_384_ROUNDS 40
extern void skinny128_384(u8* ctext, const u32* tk, const u8* ptext, const u32* rtk1);
extern void tkschedule_lfsr(u32* rtk, const u8* tk2, const u8* tk3, const int rounds);
extern void tkschedule_perm(u32* rtk);
extern void tkschedule_perm_tk1(u32* rtk1, const u8* tk1);
#endif // SKINNY128_H_
\ No newline at end of file
typedef struct ___skinny_ctrl {
unsigned char roundKeys[704]; // number of round : 40
void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K);
} skinny_ctrl;
extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
\ No newline at end of file
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned int u64;
typedef struct {
u8 tk1[16]; //to manipulate tk1 byte-wise
u32 rtk1[4*16]; //to avoid tk schedule recomputations
u32 rtk2_3[4*SKINNY128_384_ROUNDS]; //all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
/******************************************************************************
* Constant-time implementation of the SKINNY tweakable block ciphers.
*
* This implementation doesn't compute the ShiftRows operation. Some masks and
* shifts are applied during the MixColumns operation so that the proper bits
* are XORed together. Moreover, the row permutation within the MixColumns
* is omitted, as well as the bit permutation at the end of the Sbox. The rows
* are synchronized with the classical after only 4 rounds. Therefore, this
* implementation relies on a "QUADRUPLE_ROUND" routine.
*
* The Sbox computation takes advantage of some symmetry in the 8-bit Sbox to
* turn it into a 4-bit S-box computation. Although the last bit permutation
* within the Sbox is not computed, the bit ordering is synchronized with the
* classical representation after 2 calls.
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date May 2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 0
******************************************************************************/
void mixcolumns_0(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],24) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,30);
tmp = ROR(state[i],16) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,4);
tmp = ROR(state[i],8) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 1
******************************************************************************/
void mixcolumns_1(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 2
******************************************************************************/
void mixcolumns_2(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],8) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,6);
tmp = ROR(state[i],16) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],24) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 3
******************************************************************************/
void mixcolumns_3(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x30303030;
state[i] ^= ROR(tmp,4);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,26);
}
}
/******************************************************************************
* Encryption of a single block without any operation mode using SKINNY-128-384.
* RTK1 and RTK2_3 are given separately to take advantage of the fact that
* TK2 and TK3 remains the same through the entire data encryption/decryption.
******************************************************************************/
void skinny128_384_plus(u8* ctext, const u8* ptext, const u32* rtk1,
const u32* rtk2_3) {
u32 tmp; // used in SWAPMOVE macro
u32 state[4]; // 128-bit state
packing(state, ptext); // from byte to bitsliced representation
QUADRUPLE_ROUND(state, rtk1, rtk2_3);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+16);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+32);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+48);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+64);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+80);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+96);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+112);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+128);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+144);
unpacking(ctext, state); // from bitsliced to byte representation
}
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
void skinny128_384_plus(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2_3);
#define SKINNY128_384_ROUNDS 40
#define QUADRUPLE_ROUND(state, rtk1, rtk2_3) ({ \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[0]; \
state[1] ^= (rtk1)[1]; \
state[2] ^= (rtk1)[2]; \
state[3] ^= (rtk1)[3]; \
state[0] ^= (rtk2_3)[0]; \
state[1] ^= (rtk2_3)[1]; \
state[2] ^= (rtk2_3)[2]; \
state[3] ^= (rtk2_3)[3]; \
mixcolumns_0(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[4]; \
state[1] ^= (rtk1)[5]; \
state[2] ^= (rtk1)[6]; \
state[3] ^= (rtk1)[7]; \
state[0] ^= (rtk2_3)[4]; \
state[1] ^= (rtk2_3)[5]; \
state[2] ^= (rtk2_3)[6]; \
state[3] ^= (rtk2_3)[7]; \
mixcolumns_1(state); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[8]; \
state[1] ^= (rtk1)[9]; \
state[2] ^= (rtk1)[10]; \
state[3] ^= (rtk1)[11]; \
state[0] ^= (rtk2_3)[8]; \
state[1] ^= (rtk2_3)[9]; \
state[2] ^= (rtk2_3)[10]; \
state[3] ^= (rtk2_3)[11]; \
mixcolumns_2(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[12]; \
state[1] ^= (rtk1)[13]; \
state[2] ^= (rtk1)[14]; \
state[3] ^= (rtk1)[15]; \
state[0] ^= (rtk2_3)[12]; \
state[1] ^= (rtk2_3)[13]; \
state[2] ^= (rtk2_3)[14]; \
state[3] ^= (rtk2_3)[15]; \
mixcolumns_3(state); \
})
#endif // SKINNY128_H_
\ No newline at end of file
#ifndef TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
typedef unsigned char u8;
typedef unsigned int u32;
void packing(u32* out, const u8* in);
void unpacking(u8* out, u32 *in);
void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8* tk3);
void precompute_rtk1(u32* rtk1, const u8* tk1);
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define XOR_BLOCKS(x,y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
(x)[2] ^= (y)[2]; \
(x)[3] ^= (y)[3]; \
})
#define SWAPMOVE(a, b, mask, n) ({ \
tmp = (b ^ (a >> n)) & mask; \
b ^= tmp; \
a ^= (tmp << n); \
})
#define LE_LOAD(x, y) \
*(x) = (((u32)(y)[3] << 24) | \
((u32)(y)[2] << 16) | \
((u32)(y)[1] << 8) | \
(y)[0]);
#define LE_STORE(x, y) \
(x)[0] = (y) & 0xff; \
(x)[1] = ((y) >> 8) & 0xff; \
(x)[2] = ((y) >> 16) & 0xff; \
(x)[3] = (y) >> 24;
#endif // TK_SCHEDULE_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
//API required by the NIST for the LWC competition
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k);
//API required by the NIST for the LWC competition
int crypto_aead_decrypt(unsigned char *m, unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k);
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned int u64;
typedef struct {
u8 tk1[16]; //to manipulate tk1 byte-wise
u32 rtk1[4*16]; //to avoid tk schedule recomputations
u32 rtk[4*SKINNY128_384_ROUNDS]; //all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
#define SKINNY128_128_ROUNDS 40
#define SKINNY128_256_ROUNDS 48
#define SKINNY128_384_ROUNDS 56
extern void skinny128_384(u8* ctext, const u32* tk, const u8* ptext, const u32* rtk1);
extern void tkschedule_lfsr(u32* rtk, const u8* tk2, const u8* tk3, const int rounds);
extern void tkschedule_perm(u32* rtk);
extern void tkschedule_perm_tk1(u32* rtk1, const u8* tk1);
#endif // SKINNY128_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
\ No newline at end of file
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned int u64;
typedef struct {
u8 tk1[16]; //to manipulate tk1 byte-wise
u32 rtk1[4*16]; //to avoid tk schedule recomputations
u32 rtk2_3[4*SKINNY128_384_ROUNDS]; //all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
/******************************************************************************
* Constant-time implementation of the SKINNY tweakable block ciphers.
*
* This implementation doesn't compute the ShiftRows operation. Some masks and
* shifts are applied during the MixColumns operation so that the proper bits
* are XORed together. Moreover, the row permutation within the MixColumns
* is omitted, as well as the bit permutation at the end of the Sbox. The rows
* are synchronized with the classical after only 4 rounds. Therefore, this
* implementation relies on a "QUADRUPLE_ROUND" routine.
*
* The Sbox computation takes advantage of some symmetry in the 8-bit Sbox to
* turn it into a 4-bit S-box computation. Although the last bit permutation
* within the Sbox is not computed, the bit ordering is synchronized with the
* classical representation after 2 calls.
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date May 2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 0
******************************************************************************/
void mixcolumns_0(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],24) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,30);
tmp = ROR(state[i],16) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,4);
tmp = ROR(state[i],8) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 1
******************************************************************************/
void mixcolumns_1(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 2
******************************************************************************/
void mixcolumns_2(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],8) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,6);
tmp = ROR(state[i],16) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],24) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 3
******************************************************************************/
void mixcolumns_3(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x30303030;
state[i] ^= ROR(tmp,4);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,26);
}
}
/******************************************************************************
* Encryption of a single block without any operation mode using SKINNY-128-384.
* RTK1 and RTK2_3 are given separately to take advantage of the fact that
* TK2 and TK3 remains the same through the entire data encryption/decryption.
******************************************************************************/
void skinny128_384(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2_3) {
u32 tmp; // used in SWAPMOVE macro
u32 state[4]; // 128-bit state
packing(state, ptext); // from byte to bitsliced representation
QUADRUPLE_ROUND(state, rtk1, rtk2_3);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+16);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+32);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+48);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+64);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+80);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+96);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+112);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+128);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+144);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+160);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+176);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+192);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+208);
unpacking(ctext, state); // from bitsliced to byte representation
}
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
#define SKINNY128_384_ROUNDS 56
#define QUADRUPLE_ROUND(state, rtk1, rtk2_3) ({ \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[0]; \
state[1] ^= (rtk1)[1]; \
state[2] ^= (rtk1)[2]; \
state[3] ^= (rtk1)[3]; \
state[0] ^= (rtk2_3)[0]; \
state[1] ^= (rtk2_3)[1]; \
state[2] ^= (rtk2_3)[2]; \
state[3] ^= (rtk2_3)[3]; \
mixcolumns_0(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[4]; \
state[1] ^= (rtk1)[5]; \
state[2] ^= (rtk1)[6]; \
state[3] ^= (rtk1)[7]; \
state[0] ^= (rtk2_3)[4]; \
state[1] ^= (rtk2_3)[5]; \
state[2] ^= (rtk2_3)[6]; \
state[3] ^= (rtk2_3)[7]; \
mixcolumns_1(state); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[8]; \
state[1] ^= (rtk1)[9]; \
state[2] ^= (rtk1)[10]; \
state[3] ^= (rtk1)[11]; \
state[0] ^= (rtk2_3)[8]; \
state[1] ^= (rtk2_3)[9]; \
state[2] ^= (rtk2_3)[10]; \
state[3] ^= (rtk2_3)[11]; \
mixcolumns_2(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[12]; \
state[1] ^= (rtk1)[13]; \
state[2] ^= (rtk1)[14]; \
state[3] ^= (rtk1)[15]; \
state[0] ^= (rtk2_3)[12]; \
state[1] ^= (rtk2_3)[13]; \
state[2] ^= (rtk2_3)[14]; \
state[3] ^= (rtk2_3)[15]; \
mixcolumns_3(state); \
})
void skinny128_384(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2_3);
#endif // SKINNY128_H_
\ No newline at end of file
#ifndef TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
typedef unsigned char u8;
typedef unsigned int u32;
void packing(u32* out, const u8* in);
void unpacking(u8* out, u32 *in);
void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8* tk3);
void precompute_rtk1(u32* rtk1, const u8* tk1);
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define XOR_BLOCKS(x,y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
(x)[2] ^= (y)[2]; \
(x)[3] ^= (y)[3]; \
})
#define SWAPMOVE(a, b, mask, n) ({ \
tmp = (b ^ (a >> n)) & mask; \
b ^= tmp; \
a ^= (tmp << n); \
})
#define LE_LOAD(x, y) \
*(x) = (((u32)(y)[3] << 24) | \
((u32)(y)[2] << 16) | \
((u32)(y)[1] << 8) | \
(y)[0]);
#define LE_STORE(x, y) \
(x)[0] = (y) & 0xff; \
(x)[1] = ((y) >> 8) & 0xff; \
(x)[2] = ((y) >> 16) & 0xff; \
(x)[3] = (y) >> 24;
#endif // TK_SCHEDULE_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment