Commit da92cb38 by Enrico Pozzobon

Merge branch 'email-submissions'

parents 90acf8b3 f9e2581f
This source diff could not be displayed because it is too large. You can view the blob instead.
//API required by the NIST for the LWC competition
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k);
//API required by the NIST for the LWC competition
int crypto_aead_decrypt(unsigned char *m, unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k);
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned int u64;
typedef struct {
u8 tk1[16]; //to manipulate tk1 byte-wise
u32 rtk1[4*16]; //to avoid tk schedule recomputations
u32 rtk[4*SKINNY128_384_ROUNDS]; //all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
#define SKINNY128_384_ROUNDS 40
extern void skinny128_384(u8* ctext, const u32* tk, const u8* ptext, const u32* rtk1);
extern void tkschedule_lfsr(u32* rtk, const u8* tk2, const u8* tk3, const int rounds);
extern void tkschedule_perm(u32* rtk);
extern void tkschedule_perm_tk1(u32* rtk1, const u8* tk1);
#endif // SKINNY128_H_
\ No newline at end of file
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
\ No newline at end of file
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned int u64;
typedef struct {
u8 tk1[16]; //to manipulate tk1 byte-wise
u32 rtk1[4*16]; //to avoid tk schedule recomputations
u32 rtk2_3[4*SKINNY128_384_ROUNDS]; //all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
/******************************************************************************
* Constant-time implementation of the SKINNY tweakable block ciphers.
*
* This implementation doesn't compute the ShiftRows operation. Some masks and
* shifts are applied during the MixColumns operation so that the proper bits
* are XORed together. Moreover, the row permutation within the MixColumns
* is omitted, as well as the bit permutation at the end of the Sbox. The rows
* are synchronized with the classical after only 4 rounds. Therefore, this
* implementation relies on a "QUADRUPLE_ROUND" routine.
*
* The Sbox computation takes advantage of some symmetry in the 8-bit Sbox to
* turn it into a 4-bit S-box computation. Although the last bit permutation
* within the Sbox is not computed, the bit ordering is synchronized with the
* classical representation after 2 calls.
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date May 2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 0
******************************************************************************/
void mixcolumns_0(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],24) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,30);
tmp = ROR(state[i],16) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,4);
tmp = ROR(state[i],8) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 1
******************************************************************************/
void mixcolumns_1(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 2
******************************************************************************/
void mixcolumns_2(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],8) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,6);
tmp = ROR(state[i],16) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],24) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 3
******************************************************************************/
void mixcolumns_3(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x30303030;
state[i] ^= ROR(tmp,4);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,26);
}
}
/******************************************************************************
* Encryption of a single block without any operation mode using SKINNY-128-384.
* RTK1 and RTK2_3 are given separately to take advantage of the fact that
* TK2 and TK3 remains the same through the entire data encryption/decryption.
******************************************************************************/
void skinny128_384_plus(u8* ctext, const u8* ptext, const u32* rtk1,
const u32* rtk2_3) {
u32 tmp; // used in SWAPMOVE macro
u32 state[4]; // 128-bit state
packing(state, ptext); // from byte to bitsliced representation
QUADRUPLE_ROUND(state, rtk1, rtk2_3);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+16);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+32);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+48);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+64);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+80);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+96);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+112);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+128);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+144);
unpacking(ctext, state); // from bitsliced to byte representation
}
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
void skinny128_384_plus(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2_3);
#define SKINNY128_384_ROUNDS 40
#define QUADRUPLE_ROUND(state, rtk1, rtk2_3) ({ \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[0]; \
state[1] ^= (rtk1)[1]; \
state[2] ^= (rtk1)[2]; \
state[3] ^= (rtk1)[3]; \
state[0] ^= (rtk2_3)[0]; \
state[1] ^= (rtk2_3)[1]; \
state[2] ^= (rtk2_3)[2]; \
state[3] ^= (rtk2_3)[3]; \
mixcolumns_0(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[4]; \
state[1] ^= (rtk1)[5]; \
state[2] ^= (rtk1)[6]; \
state[3] ^= (rtk1)[7]; \
state[0] ^= (rtk2_3)[4]; \
state[1] ^= (rtk2_3)[5]; \
state[2] ^= (rtk2_3)[6]; \
state[3] ^= (rtk2_3)[7]; \
mixcolumns_1(state); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[8]; \
state[1] ^= (rtk1)[9]; \
state[2] ^= (rtk1)[10]; \
state[3] ^= (rtk1)[11]; \
state[0] ^= (rtk2_3)[8]; \
state[1] ^= (rtk2_3)[9]; \
state[2] ^= (rtk2_3)[10]; \
state[3] ^= (rtk2_3)[11]; \
mixcolumns_2(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[12]; \
state[1] ^= (rtk1)[13]; \
state[2] ^= (rtk1)[14]; \
state[3] ^= (rtk1)[15]; \
state[0] ^= (rtk2_3)[12]; \
state[1] ^= (rtk2_3)[13]; \
state[2] ^= (rtk2_3)[14]; \
state[3] ^= (rtk2_3)[15]; \
mixcolumns_3(state); \
})
#endif // SKINNY128_H_
\ No newline at end of file
#ifndef TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
typedef unsigned char u8;
typedef unsigned int u32;
void packing(u32* out, const u8* in);
void unpacking(u8* out, u32 *in);
void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8* tk3);
void precompute_rtk1(u32* rtk1, const u8* tk1);
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define XOR_BLOCKS(x,y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
(x)[2] ^= (y)[2]; \
(x)[3] ^= (y)[3]; \
})
#define SWAPMOVE(a, b, mask, n) ({ \
tmp = (b ^ (a >> n)) & mask; \
b ^= tmp; \
a ^= (tmp << n); \
})
#define LE_LOAD(x, y) \
*(x) = (((u32)(y)[3] << 24) | \
((u32)(y)[2] << 16) | \
((u32)(y)[1] << 8) | \
(y)[0]);
#define LE_STORE(x, y) \
(x)[0] = (y) & 0xff; \
(x)[1] = ((y) >> 8) & 0xff; \
(x)[2] = ((y) >> 16) & 0xff; \
(x)[3] = (y) >> 24;
#endif // TK_SCHEDULE_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
//API required by the NIST for the LWC competition
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k);
//API required by the NIST for the LWC competition
int crypto_aead_decrypt(unsigned char *m, unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k);
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned int u64;
typedef struct {
u8 tk1[16]; //to manipulate tk1 byte-wise
u32 rtk1[4*16]; //to avoid tk schedule recomputations
u32 rtk[4*SKINNY128_384_ROUNDS]; //all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
#define SKINNY128_128_ROUNDS 40
#define SKINNY128_256_ROUNDS 48
#define SKINNY128_384_ROUNDS 56
extern void skinny128_384(u8* ctext, const u32* tk, const u8* ptext, const u32* rtk1);
extern void tkschedule_lfsr(u32* rtk, const u8* tk2, const u8* tk3, const int rounds);
extern void tkschedule_perm(u32* rtk);
extern void tkschedule_perm_tk1(u32* rtk1, const u8* tk1);
#endif // SKINNY128_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
\ No newline at end of file
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned int u64;
typedef struct {
u8 tk1[16]; //to manipulate tk1 byte-wise
u32 rtk1[4*16]; //to avoid tk schedule recomputations
u32 rtk2_3[4*SKINNY128_384_ROUNDS]; //all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
/******************************************************************************
* Constant-time implementation of the SKINNY tweakable block ciphers.
*
* This implementation doesn't compute the ShiftRows operation. Some masks and
* shifts are applied during the MixColumns operation so that the proper bits
* are XORed together. Moreover, the row permutation within the MixColumns
* is omitted, as well as the bit permutation at the end of the Sbox. The rows
* are synchronized with the classical after only 4 rounds. Therefore, this
* implementation relies on a "QUADRUPLE_ROUND" routine.
*
* The Sbox computation takes advantage of some symmetry in the 8-bit Sbox to
* turn it into a 4-bit S-box computation. Although the last bit permutation
* within the Sbox is not computed, the bit ordering is synchronized with the
* classical representation after 2 calls.
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date May 2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 0
******************************************************************************/
void mixcolumns_0(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],24) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,30);
tmp = ROR(state[i],16) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,4);
tmp = ROR(state[i],8) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 1
******************************************************************************/
void mixcolumns_1(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 2
******************************************************************************/
void mixcolumns_2(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],8) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,6);
tmp = ROR(state[i],16) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],24) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 3
******************************************************************************/
void mixcolumns_3(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x30303030;
state[i] ^= ROR(tmp,4);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,26);
}
}
/******************************************************************************
* Encryption of a single block without any operation mode using SKINNY-128-384.
* RTK1 and RTK2_3 are given separately to take advantage of the fact that
* TK2 and TK3 remains the same through the entire data encryption/decryption.
******************************************************************************/
void skinny128_384(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2_3) {
u32 tmp; // used in SWAPMOVE macro
u32 state[4]; // 128-bit state
packing(state, ptext); // from byte to bitsliced representation
QUADRUPLE_ROUND(state, rtk1, rtk2_3);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+16);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+32);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+48);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+64);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+80);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+96);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+112);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+128);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+144);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+160);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+176);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+192);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+208);
unpacking(ctext, state); // from bitsliced to byte representation
}
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
#define SKINNY128_384_ROUNDS 56
#define QUADRUPLE_ROUND(state, rtk1, rtk2_3) ({ \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[0]; \
state[1] ^= (rtk1)[1]; \
state[2] ^= (rtk1)[2]; \
state[3] ^= (rtk1)[3]; \
state[0] ^= (rtk2_3)[0]; \
state[1] ^= (rtk2_3)[1]; \
state[2] ^= (rtk2_3)[2]; \
state[3] ^= (rtk2_3)[3]; \
mixcolumns_0(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[4]; \
state[1] ^= (rtk1)[5]; \
state[2] ^= (rtk1)[6]; \
state[3] ^= (rtk1)[7]; \
state[0] ^= (rtk2_3)[4]; \
state[1] ^= (rtk2_3)[5]; \
state[2] ^= (rtk2_3)[6]; \
state[3] ^= (rtk2_3)[7]; \
mixcolumns_1(state); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[8]; \
state[1] ^= (rtk1)[9]; \
state[2] ^= (rtk1)[10]; \
state[3] ^= (rtk1)[11]; \
state[0] ^= (rtk2_3)[8]; \
state[1] ^= (rtk2_3)[9]; \
state[2] ^= (rtk2_3)[10]; \
state[3] ^= (rtk2_3)[11]; \
mixcolumns_2(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[12]; \
state[1] ^= (rtk1)[13]; \
state[2] ^= (rtk1)[14]; \
state[3] ^= (rtk1)[15]; \
state[0] ^= (rtk2_3)[12]; \
state[1] ^= (rtk2_3)[13]; \
state[2] ^= (rtk2_3)[14]; \
state[3] ^= (rtk2_3)[15]; \
mixcolumns_3(state); \
})
void skinny128_384(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2_3);
#endif // SKINNY128_H_
\ No newline at end of file
#ifndef TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
typedef unsigned char u8;
typedef unsigned int u32;
void packing(u32* out, const u8* in);
void unpacking(u8* out, u32 *in);
void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8* tk3);
void precompute_rtk1(u32* rtk1, const u8* tk1);
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define XOR_BLOCKS(x,y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
(x)[2] ^= (y)[2]; \
(x)[3] ^= (y)[3]; \
})
#define SWAPMOVE(a, b, mask, n) ({ \
tmp = (b ^ (a >> n)) & mask; \
b ^= tmp; \
a ^= (tmp << n); \
})
#define LE_LOAD(x, y) \
*(x) = (((u32)(y)[3] << 24) | \
((u32)(y)[2] << 16) | \
((u32)(y)[1] << 8) | \
(y)[0]);
#define LE_STORE(x, y) \
(x)[0] = (y) & 0xff; \
(x)[1] = ((y) >> 8) & 0xff; \
(x)[2] = ((y) >> 16) & 0xff; \
(x)[3] = (y) >> 24;
#endif // TK_SCHEDULE_H_
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
//API required by the NIST for the LWC competition
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k);
//API required by the NIST for the LWC competition
int crypto_aead_decrypt(unsigned char *m, unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k);
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef struct {
u8 tk1[16]; //to manipulate tk1 in a byte-wise manner
u32 rtk1[32]; //to avoid recomputation of the tk schedule
u32 rtk[4*SKINNY128_384_ROUNDS];//all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
#define SKINNY128_384_ROUNDS 40
extern void skinny128_384(u8* ctext, const u32* tk, const u8* ptext, const u32* rtk1);
extern void tkschedule_lfsr(u32* rtk, const u8* tk2, const u8* tk3, const int rounds);
extern void tkschedule_perm(u32* rtk);
extern void tkschedule_perm_tk1(u32* rtk1, const u8* tk1);
#endif // SKINNY128_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
\ No newline at end of file
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef struct {
u8 tk1[16]; //to manipulate tk1 byte-wise
u32 rtk1[4*16]; //to avoid tk schedule recomputations
u32 rtk2_3[4*SKINNY128_384_ROUNDS]; //all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
/******************************************************************************
* Constant-time implementation of the SKINNY tweakable block ciphers.
*
* This implementation doesn't compute the ShiftRows operation. Some masks and
* shifts are applied during the MixColumns operation so that the proper bits
* are XORed together. Moreover, the row permutation within the MixColumns
* is omitted, as well as the bit permutation at the end of the Sbox. The rows
* are synchronized with the classical after only 4 rounds. Therefore, this
* implementation relies on a "QUADRUPLE_ROUND" routine.
*
* The Sbox computation takes advantage of some symmetry in the 8-bit Sbox to
* turn it into a 4-bit S-box computation. Although the last bit permutation
* within the Sbox is not computed, the bit ordering is synchronized with the
* classical representation after 2 calls.
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date May 2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 0
******************************************************************************/
void mixcolumns_0(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],24) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,30);
tmp = ROR(state[i],16) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,4);
tmp = ROR(state[i],8) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 1
******************************************************************************/
void mixcolumns_1(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 2
******************************************************************************/
void mixcolumns_2(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],8) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,6);
tmp = ROR(state[i],16) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],24) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 3
******************************************************************************/
void mixcolumns_3(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x30303030;
state[i] ^= ROR(tmp,4);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,26);
}
}
/******************************************************************************
* Encryption of a single block without any operation mode using SKINNY-128-384.
* RTK1 and RTK2_3 are given separately to take advantage of the fact that
* TK2 and TK3 remains the same through the entire data encryption/decryption.
******************************************************************************/
void skinny128_384_plus(u8* ctext, const u8* ptext, const u32* rtk1,
const u32* rtk2_3) {
u32 tmp; // used in SWAPMOVE macro
u32 state[4]; // 128-bit state
packing(state, ptext); // from byte to bitsliced representation
QUADRUPLE_ROUND(state, rtk1, rtk2_3);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+16);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+32);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+48);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+64);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+80);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+96);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+112);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+128);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+144);
unpacking(ctext, state); // from bitsliced to byte representation
}
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
void skinny128_384_plus(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2_3);
#define SKINNY128_384_ROUNDS 40
#define QUADRUPLE_ROUND(state, rtk1, rtk2_3) ({ \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[0]; \
state[1] ^= (rtk1)[1]; \
state[2] ^= (rtk1)[2]; \
state[3] ^= (rtk1)[3]; \
state[0] ^= (rtk2_3)[0]; \
state[1] ^= (rtk2_3)[1]; \
state[2] ^= (rtk2_3)[2]; \
state[3] ^= (rtk2_3)[3]; \
mixcolumns_0(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[4]; \
state[1] ^= (rtk1)[5]; \
state[2] ^= (rtk1)[6]; \
state[3] ^= (rtk1)[7]; \
state[0] ^= (rtk2_3)[4]; \
state[1] ^= (rtk2_3)[5]; \
state[2] ^= (rtk2_3)[6]; \
state[3] ^= (rtk2_3)[7]; \
mixcolumns_1(state); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[8]; \
state[1] ^= (rtk1)[9]; \
state[2] ^= (rtk1)[10]; \
state[3] ^= (rtk1)[11]; \
state[0] ^= (rtk2_3)[8]; \
state[1] ^= (rtk2_3)[9]; \
state[2] ^= (rtk2_3)[10]; \
state[3] ^= (rtk2_3)[11]; \
mixcolumns_2(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[12]; \
state[1] ^= (rtk1)[13]; \
state[2] ^= (rtk1)[14]; \
state[3] ^= (rtk1)[15]; \
state[0] ^= (rtk2_3)[12]; \
state[1] ^= (rtk2_3)[13]; \
state[2] ^= (rtk2_3)[14]; \
state[3] ^= (rtk2_3)[15]; \
mixcolumns_3(state); \
})
#endif // SKINNY128_H_
\ No newline at end of file
#ifndef TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
typedef unsigned char u8;
typedef unsigned int u32;
void packing(u32* out, const u8* in);
void unpacking(u8* out, u32 *in);
void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8* tk3);
void precompute_rtk1(u32* rtk1, const u8* tk1);
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define XOR_BLOCKS(x,y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
(x)[2] ^= (y)[2]; \
(x)[3] ^= (y)[3]; \
})
#define SWAPMOVE(a, b, mask, n) ({ \
tmp = (b ^ (a >> n)) & mask; \
b ^= tmp; \
a ^= (tmp << n); \
})
#define LE_LOAD(x, y) \
*(x) = (((u32)(y)[3] << 24) | \
((u32)(y)[2] << 16) | \
((u32)(y)[1] << 8) | \
(y)[0]);
#define LE_STORE(x, y) \
(x)[0] = (y) & 0xff; \
(x)[1] = ((y) >> 8) & 0xff; \
(x)[2] = ((y) >> 16) & 0xff; \
(x)[3] = (y) >> 24;
#endif // TK_SCHEDULE_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
//API required by the NIST for the LWC competition
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k);
//API required by the NIST for the LWC competition
int crypto_aead_decrypt(unsigned char *m, unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k);
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef struct {
u8 tk1[16]; //to manipulate tk1 in a byte-wise manner
u32 rtk1[32]; //to avoid recomputation of the tk schedule
u32 rtk[4*SKINNY128_384_ROUNDS];//all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
#define SKINNY128_128_ROUNDS 40
#define SKINNY128_256_ROUNDS 48
#define SKINNY128_384_ROUNDS 56
extern void skinny128_384(u8* ctext, const u32* tk, const u8* ptext, const u32* rtk1);
extern void tkschedule_lfsr(u32* rtk, const u8* tk2, const u8* tk3, const int rounds);
extern void tkschedule_perm(u32* rtk);
extern void tkschedule_perm_tk1(u32* rtk1, const u8* tk1);
#endif // SKINNY128_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
\ No newline at end of file
#include "skinny128.h"
#include "tk_schedule.h"
#include "romulus.h"
#include <string.h>
#include <stdio.h>
//Encryption and authentication using Romulus-N1
int crypto_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k) {
int i;
u32 tmp;
skinny_128_384_tks tks;
u8 state[BLOCKBYTES], pad[BLOCKBYTES];
(void)nsec;
// ----------------- Initialization -----------------
*clen = mlen + TAGBYTES;
memset(tks.tk1, 0x00, KEYBYTES);
memset(state, 0x00, BLOCKBYTES);
tks.tk1[0] = 0x01; //56-bit LFSR counter
// ----------------- Initialization -----------------
// ----------------- Process the associated data -----------------
//Handle the special case of no associated data
if (adlen == 0) {
UPDATE_CTR(tks.tk1);
SET_DOMAIN(tks, 0x1A);
precompute_rtk2_3(tks.rtk2_3, npub, k);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
} else {
// Process all double blocks except the last
SET_DOMAIN(tks, 0x08);
while (adlen > 2*BLOCKBYTES) {
UPDATE_CTR(tks.tk1);
XOR_BLOCK(state, state, ad);
precompute_rtk2_3(tks.rtk2_3, ad + BLOCKBYTES, k);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
UPDATE_CTR(tks.tk1);
ad += 2*BLOCKBYTES;
adlen -= 2*BLOCKBYTES;
}
//Pad and process the left-over blocks
UPDATE_CTR(tks.tk1);
if (adlen == 2*BLOCKBYTES) {
// Left-over complete double block
XOR_BLOCK(state, state, ad);
precompute_rtk2_3(tks.rtk2_3, ad + BLOCKBYTES, k);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
UPDATE_CTR(tks.tk1);
SET_DOMAIN(tks, 0x18);
} else if (adlen > BLOCKBYTES) {
// Left-over partial double block
adlen -= BLOCKBYTES;
XOR_BLOCK(state, state, ad);
memcpy(pad, ad + BLOCKBYTES, adlen);
memset(pad + adlen, 0x00, 15 - adlen);
pad[15] = adlen;
precompute_rtk2_3(tks.rtk2_3, pad, k);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
UPDATE_CTR(tks.tk1);
SET_DOMAIN(tks, 0x1A);
} else if (adlen == BLOCKBYTES) {
// Left-over complete single block
XOR_BLOCK(state, state, ad);
SET_DOMAIN(tks, 0x18);
} else {
// Left-over partial single block
for(i =0; i < (int)adlen; i++)
state[i] ^= ad[i];
state[15] ^= adlen;
SET_DOMAIN(tks, 0x1A);
}
precompute_rtk2_3(tks.rtk2_3, npub, k);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
}
// ----------------- Process the associated data -----------------
// ----------------- Process the plaintext -----------------
memset(tks.tk1, 0, KEYBYTES);
tks.tk1[0] = 0x01; //init the 56-bit LFSR counter
if (mlen == 0) {
UPDATE_CTR(tks.tk1);
SET_DOMAIN(tks, 0x15);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
} else {
//process all blocks except the last
SET_DOMAIN(tks, 0x04);
while (mlen > BLOCKBYTES) {
RHO(state,c,m);
UPDATE_CTR(tks.tk1);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
c += BLOCKBYTES;
m += BLOCKBYTES;
mlen -= BLOCKBYTES;
}
//pad and process the last block
UPDATE_CTR(tks.tk1);
if (mlen < BLOCKBYTES) {
for(i = 0; i < (int)mlen; i++) {
tmp = m[i]; //use of tmp variable just in case 'c = m'
c[i] = m[i] ^ (state[i] >> 1) ^ (state[i] & 0x80) ^ (state[i] << 7);
state[i] ^= (u8)tmp;
}
state[15] ^= (u8)mlen; //padding
SET_DOMAIN(tks, 0x15);
} else {
RHO(state,c,m);
SET_DOMAIN(tks, 0x14);
}
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
c += mlen;
}
// ----------------- Process the plaintext -----------------
// ----------------- Generate the tag -----------------
G(state,state);
memcpy(c, state, TAGBYTES);
// ----------------- Generate the tag -----------------
return 0;
}
//Decryption and tag verification using Romulus-N1
int crypto_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k) {
int i;
u32 tmp;
skinny_128_384_tks tks;
u8 state[BLOCKBYTES], pad[BLOCKBYTES];
(void)nsec;
if (clen < TAGBYTES)
return -1;
// ----------------- Initialization -----------------
*mlen = clen - TAGBYTES;
memset(tks.tk1, 0x00, KEYBYTES);
memset(state, 0x00, BLOCKBYTES);
tks.tk1[0] = 0x01; //56-bit LFSR counter
// ----------------- Initialization -----------------
// ----------------- Process the associated data -----------------
//Handle the special case of no associated data
if (adlen == 0) {
UPDATE_CTR(tks.tk1);
SET_DOMAIN(tks, 0x1A);
precompute_rtk2_3(tks.rtk2_3, npub, k);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
} else {
// Process all double blocks except the last
SET_DOMAIN(tks, 0x08);
while (adlen > 2*BLOCKBYTES) {
UPDATE_CTR(tks.tk1);
XOR_BLOCK(state, state, ad);
precompute_rtk2_3(tks.rtk2_3, ad + BLOCKBYTES, k);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
UPDATE_CTR(tks.tk1);
ad += 2*BLOCKBYTES;
adlen -= 2*BLOCKBYTES;
}
//Pad and process the left-over blocks
UPDATE_CTR(tks.tk1);
if (adlen == 2*BLOCKBYTES) {
// Left-over complete double block
XOR_BLOCK(state, state, ad);
precompute_rtk2_3(tks.rtk2_3, ad + BLOCKBYTES, k);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
UPDATE_CTR(tks.tk1);
SET_DOMAIN(tks, 0x18);
} else if (adlen > BLOCKBYTES) {
// Left-over partial double block
adlen -= BLOCKBYTES;
XOR_BLOCK(state, state, ad);
memcpy(pad, ad + BLOCKBYTES, adlen);
memset(pad + adlen, 0x00, 15 - adlen);
pad[15] = adlen;
precompute_rtk2_3(tks.rtk2_3, pad, k);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
UPDATE_CTR(tks.tk1);
SET_DOMAIN(tks, 0x1A);
} else if (adlen == BLOCKBYTES) {
// Left-over complete single block
XOR_BLOCK(state, state, ad);
SET_DOMAIN(tks, 0x18);
} else {
// Left-over partial single block
for(i =0; i < (int)adlen; i++)
state[i] ^= ad[i];
state[15] ^= adlen;
SET_DOMAIN(tks, 0x1A);
}
precompute_rtk2_3(tks.rtk2_3, npub, k);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
}
// ----------------- Process the associated data -----------------
// ----------------- Process the ciphertext -----------------
clen -= TAGBYTES;
memset(tks.tk1, 0, KEYBYTES);
tks.tk1[0] = 0x01; //init the 56-bit LFSR counter
if (clen == 0) {
UPDATE_CTR(tks.tk1);
SET_DOMAIN(tks, 0x15);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
} else {
//process all blocks except the last
SET_DOMAIN(tks, 0x04);
while (clen > BLOCKBYTES) {
RHO_INV(state,c,m);
UPDATE_CTR(tks.tk1);
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
c += BLOCKBYTES;
m += BLOCKBYTES;
clen -= BLOCKBYTES;
}
//pad and process the last block
UPDATE_CTR(tks.tk1);
if (clen < BLOCKBYTES) {
for(i = 0; i < (int)clen; i++) {
m[i] = c[i] ^ (state[i] >> 1) ^ (state[i] & 0x80) ^ (state[i] << 7);
state[i] ^= m[i];
}
state[15] ^= (u8)clen; //padding
SET_DOMAIN(tks, 0x15);
} else {
RHO_INV(state,c,m);
SET_DOMAIN(tks, 0x14);
}
precompute_rtk1(tks.rtk1, tks.tk1);
skinny128_384(state, state, tks.rtk1, tks.rtk2_3);
}
// ----------------- Process the plaintext -----------------
// ----------------- Generate and check the tag -----------------
G(state,state);
tmp = 0;
for(i = 0; i < TAGBYTES; i++)
tmp |= state[i] ^ c[clen+i]; //constant-time tag comparison
// ----------------- Generate and check the tag -----------------
return tmp;
}
\ No newline at end of file
#ifndef ROMULUSN1_H_
#define ROMULUSN1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef struct {
u8 tk1[16]; //to manipulate tk1 byte-wise
u32 rtk1[4*16]; //to avoid tk schedule recomputations
u32 rtk2_3[4*SKINNY128_384_ROUNDS]; //all round tweakeys
} skinny_128_384_tks;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(tks, domain) ((tks).tk1[7] = (domain))
//G as defined in the Romulus specification in a 32-bit word-wise manner
#define G(x,y) ({ \
tmp = ((u32*)(y))[0]; \
((u32*)(x))[0] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[1]; \
((u32*)(x))[1] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[2]; \
((u32*)(x))[2] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
tmp = ((u32*)(y))[3]; \
((u32*)(x))[3] = (tmp >> 1 & 0x7f7f7f7f) ^ ((tmp ^ (tmp << 7)) & 0x80808080); \
})
//update the counter in tk1 in a 32-bit word-wise manner
#define UPDATE_CTR(tk1) ({ \
tmp = ((u32*)(tk1))[1]; \
((u32*)(tk1))[1] = (tmp << 1) & 0x00ffffff; \
((u32*)(tk1))[1] |= (((u32*)(tk1))[0] >> 31); \
((u32*)(tk1))[1] |= tmp & 0xff000000; \
((u32*)(tk1))[0] <<= 1; \
if ((tmp >> 23) & 0x01) \
((u32*)(tk1))[0] ^= 0x95; \
})
//x <- y ^ z for 128-bit blocks
#define XOR_BLOCK(x,y,z) ({ \
((u32*)(x))[0] = ((u32*)(y))[0] ^ ((u32*)(z))[0]; \
((u32*)(x))[1] = ((u32*)(y))[1] ^ ((u32*)(z))[1]; \
((u32*)(x))[2] = ((u32*)(y))[2] ^ ((u32*)(z))[2]; \
((u32*)(x))[3] = ((u32*)(y))[3] ^ ((u32*)(z))[3]; \
})
//Rho as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO(x,y,z) ({ \
G(pad,x); \
XOR_BLOCK(y, pad, z); \
XOR_BLOCK(x, x, z); \
})
//Rho inverse as defined in the Romulus specification
//use pad as a tmp variable in case y = z
#define RHO_INV(x, y, z) ({ \
G(pad, x); \
XOR_BLOCK(z, pad, y); \
XOR_BLOCK(x, x, z); \
})
#endif // ROMULUSN1_H_
\ No newline at end of file
/******************************************************************************
* Constant-time implementation of the SKINNY tweakable block ciphers.
*
* This implementation doesn't compute the ShiftRows operation. Some masks and
* shifts are applied during the MixColumns operation so that the proper bits
* are XORed together. Moreover, the row permutation within the MixColumns
* is omitted, as well as the bit permutation at the end of the Sbox. The rows
* are synchronized with the classical after only 4 rounds. Therefore, this
* implementation relies on a "QUADRUPLE_ROUND" routine.
*
* The Sbox computation takes advantage of some symmetry in the 8-bit Sbox to
* turn it into a 4-bit S-box computation. Although the last bit permutation
* within the Sbox is not computed, the bit ordering is synchronized with the
* classical representation after 2 calls.
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date May 2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 0
******************************************************************************/
void mixcolumns_0(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],24) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,30);
tmp = ROR(state[i],16) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,4);
tmp = ROR(state[i],8) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 1
******************************************************************************/
void mixcolumns_1(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 2
******************************************************************************/
void mixcolumns_2(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],8) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,6);
tmp = ROR(state[i],16) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],24) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 3
******************************************************************************/
void mixcolumns_3(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x30303030;
state[i] ^= ROR(tmp,4);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,26);
}
}
/******************************************************************************
* Encryption of a single block without any operation mode using SKINNY-128-384.
* RTK1 and RTK2_3 are given separately to take advantage of the fact that
* TK2 and TK3 remains the same through the entire data encryption/decryption.
******************************************************************************/
void skinny128_384(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2_3) {
u32 tmp; // used in SWAPMOVE macro
u32 state[4]; // 128-bit state
packing(state, ptext); // from byte to bitsliced representation
QUADRUPLE_ROUND(state, rtk1, rtk2_3);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+16);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+32);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+48);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+64);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+80);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+96);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+112);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+128);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+144);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+160);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+176);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+192);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+208);
unpacking(ctext, state); // from bitsliced to byte representation
}
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
#define SKINNY128_384_ROUNDS 56
#define QUADRUPLE_ROUND(state, rtk1, rtk2_3) ({ \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[0]; \
state[1] ^= (rtk1)[1]; \
state[2] ^= (rtk1)[2]; \
state[3] ^= (rtk1)[3]; \
state[0] ^= (rtk2_3)[0]; \
state[1] ^= (rtk2_3)[1]; \
state[2] ^= (rtk2_3)[2]; \
state[3] ^= (rtk2_3)[3]; \
mixcolumns_0(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[4]; \
state[1] ^= (rtk1)[5]; \
state[2] ^= (rtk1)[6]; \
state[3] ^= (rtk1)[7]; \
state[0] ^= (rtk2_3)[4]; \
state[1] ^= (rtk2_3)[5]; \
state[2] ^= (rtk2_3)[6]; \
state[3] ^= (rtk2_3)[7]; \
mixcolumns_1(state); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[8]; \
state[1] ^= (rtk1)[9]; \
state[2] ^= (rtk1)[10]; \
state[3] ^= (rtk1)[11]; \
state[0] ^= (rtk2_3)[8]; \
state[1] ^= (rtk2_3)[9]; \
state[2] ^= (rtk2_3)[10]; \
state[3] ^= (rtk2_3)[11]; \
mixcolumns_2(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[12]; \
state[1] ^= (rtk1)[13]; \
state[2] ^= (rtk1)[14]; \
state[3] ^= (rtk1)[15]; \
state[0] ^= (rtk2_3)[12]; \
state[1] ^= (rtk2_3)[13]; \
state[2] ^= (rtk2_3)[14]; \
state[3] ^= (rtk2_3)[15]; \
mixcolumns_3(state); \
})
void skinny128_384(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2_3);
#endif // SKINNY128_H_
\ No newline at end of file
#ifndef TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
typedef unsigned char u8;
typedef unsigned int u32;
void packing(u32* out, const u8* in);
void unpacking(u8* out, u32 *in);
void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8* tk3);
void precompute_rtk1(u32* rtk1, const u8* tk1);
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define XOR_BLOCKS(x,y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
(x)[2] ^= (y)[2]; \
(x)[3] ^= (y)[3]; \
})
#define SWAPMOVE(a, b, mask, n) ({ \
tmp = (b ^ (a >> n)) & mask; \
b ^= tmp; \
a ^= (tmp << n); \
})
#define LE_LOAD(x, y) \
*(x) = (((u32)(y)[3] << 24) | \
((u32)(y)[2] << 16) | \
((u32)(y)[1] << 8) | \
(y)[0]);
#define LE_STORE(x, y) \
(x)[0] = (y) & 0xff; \
(x)[1] = ((y) >> 8) & 0xff; \
(x)[2] = ((y) >> 16) & 0xff; \
(x)[3] = (y) >> 24;
#endif // TK_SCHEDULE_H_
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
\ No newline at end of file
/******************************************************************************
* Constant-time implementation of SKINNY-AEAD-M1 (v1.1).
*
* Two blocks are treated in parallel with SKINNY-128-384 whenever possible.
*
* For more details, see the paper at: https://
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date May 2020
******************************************************************************/
#include "skinny128.h"
#include "skinnyaead.h"
#include <string.h>
#include <stdio.h>
/******************************************************************************
* x ^= y where x, y are 128-bit blocks (16 bytes array).
******************************************************************************/
static void xor_block(u8 * x, const u8* y) {
for(int i = 0; i < BLOCKBYTES; i++)
x[i] ^= y[i];
}
/******************************************************************************
* Encryption and authentication using SKINNY-AEAD-M1
******************************************************************************/
int crypto_aead_encrypt (unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k) {
u64 i,lfsr = 1;
u8 feedback;
u32 rtk1[4*16];
u32 rtk2_3[4*SKINNY128_384_ROUNDS];
u8 tmp[BLOCKBYTES], auth[BLOCKBYTES], sum[BLOCKBYTES];
(void)nsec;
// ----------------- Initialization -----------------
*clen = mlen + TAGBYTES;
tkschedule_lfsr(rtk2_3, npub, k, SKINNY128_384_ROUNDS);
tkschedule_perm(rtk2_3);
memset(tmp, 0x00, BLOCKBYTES);
memset(auth, 0x00, BLOCKBYTES);
memset(sum, 0x00, BLOCKBYTES);
// ----------------- Initialization -----------------
// ----------------- Process the plaintext -----------------
while (mlen >= BLOCKBYTES) { // while entire blocks to process
LE_STR_64(tmp, lfsr);
tkschedule_perm_tk1(rtk1, tmp); // precompute RTK1 given the LFSR
skinny128_384(c, rtk2_3, m, rtk1);
xor_block(sum, m); // sum for tag computation
mlen -= BLOCKBYTES;
c += BLOCKBYTES;
m += BLOCKBYTES;
UPDATE_LFSR(lfsr); // update lfsr for next block
}
SET_DOMAIN(tmp, 0x04); // domain for tag computation
if (mlen > 0) { // last block is partial
LE_STR_64(tmp, lfsr); // lfsr for last block
SET_DOMAIN(tmp, 0x01); // domain for padding
for(i = 0; i < mlen; i++)
sum[i] ^= m[i]; // sum for tag computation
sum[i] ^= 0x80; // padding
tkschedule_perm_tk1(rtk1, tmp);
skinny128_384(auth, rtk2_3, auth, rtk1); // encrypt 'auth' = 0^16
for(i = 0; i < mlen; i++)
c[i] = auth[i] ^ m[i]; // encrypted padded block
c += mlen;
SET_DOMAIN(tmp, 0x05); // domain for tag computation
UPDATE_LFSR(lfsr);
}
LE_STR_64(tmp, lfsr); // lfsr for tag computation
tkschedule_perm_tk1(rtk1, tmp);
skinny128_384(sum, rtk2_3, sum, rtk1); // compute the tag
memcpy(c, sum, TAGBYTES);
// ----------------- Process the plaintext -----------------
// ----------------- Process the associated data -----------------
lfsr = 1;
SET_DOMAIN(tmp, 0x02);
memset(auth, 0x00, BLOCKBYTES);
while (adlen >= BLOCKBYTES) {
LE_STR_64(tmp, lfsr);
tkschedule_perm_tk1(rtk1, tmp);
skinny128_384(sum, rtk2_3, ad, rtk1); // use 'sum' as tmp array
xor_block(auth, sum);
adlen -= BLOCKBYTES;
ad += BLOCKBYTES;
UPDATE_LFSR(lfsr);
}
if (adlen > 0) {
LE_STR_64(tmp, lfsr);
SET_DOMAIN(tmp, 0x03); // domain for padding ad
tkschedule_perm_tk1(rtk1, tmp);
memset(tmp, 0x00, BLOCKBYTES); // padding
memcpy(tmp, ad, adlen); // padding
tmp[adlen] = 0x80; // padding
skinny128_384(tmp, rtk2_3, tmp, rtk1);
xor_block(auth, tmp);
}
xor_block(c, auth); // XOR for tag computation
// ----------------- Process the associated data -----------------
return 0;
}
/******************************************************************************
* Encryption and authentication using SKINNY-AEAD-M1
******************************************************************************/
int crypto_aead_decrypt (unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k) {
u64 i,lfsr = 1;
u8 feedback;
u32 rtk1[4*16];
u32 rtk2_3[4*SKINNY128_384_ROUNDS];
u8 tmp[2*BLOCKBYTES], auth[BLOCKBYTES], sum[BLOCKBYTES];
(void)nsec;
if (clen < TAGBYTES)
return -1;
// ----------------- Initialization -----------------
clen -= TAGBYTES;
*mlen = clen;
tkschedule_lfsr(rtk2_3, npub, k, SKINNY128_384_ROUNDS);
tkschedule_perm(rtk2_3);
memset(tmp, 0x00, 2*BLOCKBYTES);
memset(auth, 0x00, BLOCKBYTES);
memset(sum, 0x00, BLOCKBYTES);
// ----------------- Initialization -----------------
// ----------------- Process the plaintext -----------------
while (clen >= BLOCKBYTES) { // while entire blocks to process
LE_STR_64(tmp, lfsr);
tkschedule_perm_tk1(rtk1, tmp); // precompute RTK1 given the LFSR
skinny128_384_inv(m, rtk2_3, c, rtk1);
xor_block(sum, m); // sum for tag computation
clen -= BLOCKBYTES;
c += BLOCKBYTES;
m += BLOCKBYTES;
UPDATE_LFSR(lfsr); // update LFSR for the next block
}
SET_DOMAIN(tmp, 0x04); // domain for tag computation
if (clen > 0) { // last block is partial
LE_STR_64(tmp, lfsr); // lfsr for last block
SET_DOMAIN(tmp, 0x01); // domain for padding
tkschedule_perm_tk1(rtk1, tmp);
skinny128_384(auth, rtk2_3, auth, rtk1);
for(i = 0; i < clen; i++) {
m[i] = auth[i] ^ c[i]; // encrypted padded block
sum[i] ^= m[i]; // sum for tag computation
}
sum[i] ^= 0x80; // padding
c += clen;
SET_DOMAIN(tmp, 0x05); // domain for tag computation
UPDATE_LFSR(lfsr);
}
LE_STR_64(tmp, lfsr); // lfsr for tag computation
tkschedule_perm_tk1(rtk1, tmp);
skinny128_384(sum, rtk2_3, sum, rtk1); // compute the tag
// ----------------- Process the plaintext -----------------
// ----------------- Process the associated data -----------------
lfsr = 1;
SET_DOMAIN(tmp, 0x02);
memset(auth, 0x00, BLOCKBYTES);
while (adlen >= BLOCKBYTES) {
LE_STR_64(tmp, lfsr);
tkschedule_perm_tk1(rtk1, tmp);
skinny128_384(tmp + BLOCKBYTES, rtk2_3, ad, rtk1);
xor_block(auth, tmp + BLOCKBYTES);
adlen -= BLOCKBYTES;
ad += BLOCKBYTES;
UPDATE_LFSR(lfsr);
}
if (adlen > 0) {
LE_STR_64(tmp, lfsr);
SET_DOMAIN(tmp, 0x03); // domain for padding ad
tkschedule_perm_tk1(rtk1, tmp);
memset(tmp, 0x00, BLOCKBYTES); // padding
memcpy(tmp, ad, adlen); // padding
tmp[adlen] ^= 0x80; // padding
skinny128_384(tmp, rtk2_3, tmp, rtk1);
xor_block(auth, tmp);
}
xor_block(sum, auth); // XOR for tag computation
feedback = 0;
for(i = 0; i < TAGBYTES; i++)
feedback |= sum[i] ^ c[i]; // constant-time tag verification
return feedback;
// ----------------- Process the associated data -----------------
}
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
#define SKINNY128_384_ROUNDS 40
extern void skinny128_384(u8* ctext, const u32* rtk2_3, const u8* ptext, const u32* rtk1);
extern void skinny128_384_inv(u8* ptext, const u32* rtk2_3, const u8* ctext, const u32* rtk1);
extern void tkschedule_lfsr(u32* rtk2_3, const u8* tk2, const u8* tk3, const int rounds);
extern void tkschedule_perm(u32* rtk2_3);
extern void tkschedule_perm_tk1(u32* rtk1, const u8* tk1);
#endif // SKINNY128_H_
\ No newline at end of file
#ifndef SKINNYAEADM1_H_
#define SKINNYAEADM1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(ptr, domain) ((ptr)[15] = (domain))
#define UPDATE_LFSR(lfsr) ({ \
feedback = ((lfsr) & (1ULL << 63)) ? 0x1B : 0x00; \
(lfsr) = ((lfsr) << 1) ^ feedback; \
})
#define LE_STR_64(ptr, x) ({ \
(ptr)[0] = (u8)(x); \
(ptr)[1] = (u8)((x) >> 8); \
(ptr)[2] = (u8)((x) >> 16); \
(ptr)[3] = (u8)((x) >> 24); \
(ptr)[4] = (u8)((x) >> 32); \
(ptr)[5] = (u8)((x) >> 40); \
(ptr)[6] = (u8)((x) >> 48); \
(ptr)[7] = (u8)((x) >> 56); \
})
//x ^= y with x, y 128-bit blocks
#define XOR_BLOCK(x,y) ({ \
((u32*)(x))[0] ^= ((u32*)(y))[0]; \
((u32*)(x))[1] ^= ((u32*)(y))[1]; \
((u32*)(x))[2] ^= ((u32*)(y))[2]; \
((u32*)(x))[3] ^= ((u32*)(y))[3]; \
})
#endif // SKINNYAEADM1_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
//API required by the NIST for the LWC competition
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k);
//API required by the NIST for the LWC competition
int crypto_aead_decrypt(unsigned char *m, unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k);
#ifndef SKINNY128_H_
#define SKINNY128_H_
typedef unsigned char u8;
typedef unsigned int u32;
#define SKINNY128_384_ROUNDS 40
extern void skinny128_384(u8* ctext, u8* ctext_bis, const u8* ptext, const u8* ptext_bis, const u32* rtk1, const u32* rtk2_3);
extern void skinny128_384_inv(u8* ptext, u8* ptext_bis, const u8* ctext, const u8* ctext_bis, const u32* rtk1, const u32* rtk2_3);
extern void tkschedule_lfsr_2(u32* rtk, const u8* tk2, const u8* tk2_bis, const int rounds);
extern void pack_tk1(u32* rtk, const u8* tk2, const u8* tk2_bis, const int rounds);
extern void tkschedule_lfsr_3(u32* rtk, const u8* tk3, const u8* tk3_bis, const int rounds);
extern void tkschedule_perm(u32* rtk);
extern void tkschedule_perm_tk1(u32* rtk1, const u8* tk1, const u8* tk1_bis);
#endif // SKINNY128_H_
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
#ifndef SKINNYAEADM1_H_
#define SKINNYAEADM1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(ptr, domain) ((ptr)[15] = (domain))
#define UPDATE_LFSR(lfsr) ({ \
feedback = ((lfsr) & (1ULL << 63)) ? 0x1B : 0x00; \
(lfsr) = ((lfsr) << 1) ^ feedback; \
})
#define LE_STR_64(ptr, x) ({ \
(ptr)[0] = (u8)(x); \
(ptr)[1] = (u8)((x) >> 8); \
(ptr)[2] = (u8)((x) >> 16); \
(ptr)[3] = (u8)((x) >> 24); \
(ptr)[4] = (u8)((x) >> 32); \
(ptr)[5] = (u8)((x) >> 40); \
(ptr)[6] = (u8)((x) >> 48); \
(ptr)[7] = (u8)((x) >> 56); \
})
//x ^= y with x, y 128-bit blocks
#define XOR_BLOCK(x,y) ({ \
((u32*)(x))[0] ^= ((u32*)(y))[0]; \
((u32*)(x))[1] ^= ((u32*)(y))[1]; \
((u32*)(x))[2] ^= ((u32*)(y))[2]; \
((u32*)(x))[3] ^= ((u32*)(y))[3]; \
})
#endif // SKINNYAEADM1_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
\ No newline at end of file
/******************************************************************************
* Constant-time implementation of SKINNY-AEAD-M1 (v1.1).
*
* Two blocks are treated in parallel with SKINNY-128-384 whenever possible.
*
* For more details, see the paper at: https://
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date May 2020
******************************************************************************/
#include "skinny128.h"
#include "skinnyaead.h"
#include <string.h>
#include <stdio.h>
/******************************************************************************
* x ^= y where x, y are 128-bit blocks (16 bytes array).
******************************************************************************/
static void xor_block(u8 * x, const u8* y) {
for(int i = 0; i < BLOCKBYTES; i++)
x[i] ^= y[i];
}
/******************************************************************************
* Encryption and authentication using SKINNY-AEAD-M1
******************************************************************************/
int crypto_aead_encrypt (unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k) {
u64 i,lfsr = 1;
u8 feedback;
u32 rtk1[4*16];
u32 rtk2_3[4*SKINNY128_384_ROUNDS];
u8 tmp[2*BLOCKBYTES], auth[BLOCKBYTES];
(void)nsec;
// ----------------- Initialization -----------------
*clen = mlen + TAGBYTES;
precompute_rtk2_3(rtk2_3, npub, k);
memset(tmp, 0x00, 2*BLOCKBYTES);
memset(auth, 0x00, BLOCKBYTES);
memset(c + mlen, 0x00, BLOCKBYTES);
// ----------------- Initialization -----------------
// ----------------- Process the plaintext -----------------
while (mlen >= BLOCKBYTES) { // while entire blocks to process
LE_STR_64(tmp, lfsr);
precompute_rtk1(rtk1, tmp); // precompute RTK1 given the LFSR
skinny128_384_plus_encrypt(c, m, rtk1, rtk2_3);
xor_block(c + mlen, m); // sum for tag computation
mlen -= BLOCKBYTES;
c += BLOCKBYTES;
m += BLOCKBYTES;
UPDATE_LFSR(lfsr); // update lfsr for next block
}
SET_DOMAIN(tmp, 0x04); // domain for tag computation
if (mlen > 0) { // last block is partial
LE_STR_64(tmp, lfsr); // lfsr for last block
SET_DOMAIN(tmp, 0x01); // domain for padding
for(i = 0; i < mlen; i++)
c[mlen + i] ^= m[i]; // sum for tag computation
c[mlen + i] ^= 0x80; // padding
precompute_rtk1(rtk1, tmp);
skinny128_384_plus_encrypt(auth, auth, rtk1, rtk2_3);
for(i = 0; i < mlen; i++)
c[i] = auth[i] ^ m[i]; // encrypted padded block
c += mlen;
SET_DOMAIN(tmp, 0x05); // domain for tag computation
UPDATE_LFSR(lfsr);
}
LE_STR_64(tmp, lfsr); // lfsr for tag computation
precompute_rtk1(rtk1, tmp);
skinny128_384_plus_encrypt(c, c, rtk1, rtk2_3); // compute the tag
// ----------------- Process the plaintext -----------------
// ----------------- Process the associated data -----------------
lfsr = 1;
SET_DOMAIN(tmp, 0x02);
memset(auth, 0x00, BLOCKBYTES);
while (adlen >= BLOCKBYTES) {
LE_STR_64(tmp, lfsr);
precompute_rtk1(rtk1, tmp);
skinny128_384_plus_encrypt(tmp + BLOCKBYTES, ad, rtk1, rtk2_3);
xor_block(auth, tmp + BLOCKBYTES);
adlen -= BLOCKBYTES;
ad += BLOCKBYTES;
UPDATE_LFSR(lfsr);
}
if (adlen > 0) {
LE_STR_64(tmp, lfsr);
SET_DOMAIN(tmp, 0x03); // domain for padding ad
precompute_rtk1(rtk1, tmp);
memset(tmp, 0x00, BLOCKBYTES); // padding
memcpy(tmp, ad, adlen); // padding
tmp[adlen] ^= 0x80; // padding
skinny128_384_plus_encrypt(tmp, tmp, rtk1, rtk2_3);
xor_block(auth, tmp);
}
xor_block(c, auth); // XOR for tag computation
// ----------------- Process the associated data -----------------
return 0;
}
/******************************************************************************
* Encryption and authentication using SKINNY-AEAD-M1
******************************************************************************/
int crypto_aead_decrypt (unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k) {
u64 i,lfsr = 1;
u8 feedback;
u32 rtk1[4*16];
u32 rtk2_3[4*SKINNY128_384_ROUNDS];
u8 tmp[2*BLOCKBYTES], auth[BLOCKBYTES], sum[BLOCKBYTES];
(void)nsec;
if (clen < TAGBYTES)
return -1;
// ----------------- Initialization -----------------
clen -= TAGBYTES;
*mlen = clen;
precompute_rtk2_3(rtk2_3, npub, k);
memset(tmp, 0x00, 2*BLOCKBYTES);
memset(auth, 0x00, BLOCKBYTES);
memset(sum, 0x00, BLOCKBYTES);
// ----------------- Initialization -----------------
// ----------------- Process the plaintext -----------------
while (clen >= BLOCKBYTES) { // while entire blocks to process
LE_STR_64(tmp, lfsr);
precompute_rtk1(rtk1, tmp); // precompute RTK1 given the LFSR
skinny128_384_plus_decrypt(m, c, rtk1, rtk2_3);
xor_block(sum, m); // sum for tag computation
clen -= BLOCKBYTES;
c += BLOCKBYTES;
m += BLOCKBYTES;
UPDATE_LFSR(lfsr); // update LFSR for the next block
}
SET_DOMAIN(tmp, 0x04); // domain for tag computation
if (clen > 0) { // last block is partial
LE_STR_64(tmp, lfsr); // lfsr for last block
SET_DOMAIN(tmp, 0x01); // domain for padding
precompute_rtk1(rtk1, tmp);
skinny128_384_plus_encrypt(auth, auth, rtk1, rtk2_3);
for(i = 0; i < clen; i++) {
m[i] = auth[i] ^ c[i]; // encrypted padded block
sum[i] ^= m[i]; // sum for tag computation
}
sum[i] ^= 0x80; // padding
c += clen;
SET_DOMAIN(tmp, 0x05); // domain for tag computation
UPDATE_LFSR(lfsr);
}
LE_STR_64(tmp, lfsr); // lfsr for tag computation
precompute_rtk1(rtk1, tmp);
skinny128_384_plus_encrypt(sum, sum, rtk1, rtk2_3); // compute the tag
// ----------------- Process the plaintext -----------------
// ----------------- Process the associated data -----------------
lfsr = 1;
SET_DOMAIN(tmp, 0x02);
memset(auth, 0x00, BLOCKBYTES);
while (adlen >= BLOCKBYTES) {
LE_STR_64(tmp, lfsr);
precompute_rtk1(rtk1, tmp);
skinny128_384_plus_encrypt(tmp + BLOCKBYTES, ad, rtk1, rtk2_3);
xor_block(auth, tmp + BLOCKBYTES);
adlen -= BLOCKBYTES;
ad += BLOCKBYTES;
UPDATE_LFSR(lfsr);
}
if (adlen > 0) {
LE_STR_64(tmp, lfsr);
SET_DOMAIN(tmp, 0x03); // domain for padding ad
precompute_rtk1(rtk1, tmp);
memset(tmp, 0x00, BLOCKBYTES); // padding
memcpy(tmp, ad, adlen); // padding
tmp[adlen] ^= 0x80; // padding
skinny128_384_plus_encrypt(tmp, tmp, rtk1, rtk2_3);
xor_block(auth, tmp);
}
xor_block(sum, auth); // XOR for tag computation
feedback = 0;
for(i = 0; i < TAGBYTES; i++)
feedback |= sum[i] ^ c[i]; // constant-time tag verification
return feedback;
// ----------------- Process the associated data -----------------
}
\ No newline at end of file
/******************************************************************************
* Constant-time implementation of the SKINNY tweakable block ciphers.
*
* This implementation doesn't compute the ShiftRows operation. Some masks and
* shifts are applied during the MixColumns operation so that the proper bits
* are XORed together. Moreover, the row permutation within the MixColumns
* is omitted, as well as the bit permutation at the end of the Sbox. The rows
* are synchronized with the classical after only 4 rounds. Therefore, this
* implementation relies on a "QUADRUPLE_ROUND" routine.
*
* The Sbox computation takes advantage of some symmetry in the 8-bit Sbox to
* turn it into a 4-bit S-box computation. Although the last bit permutation
* within the Sbox is not computed, the bit ordering is synchronized with the
* classical representation after 2 calls.
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date May 2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 0
******************************************************************************/
void mixcolumns_0(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],24) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,30);
tmp = ROR(state[i],16) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,4);
tmp = ROR(state[i],8) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 1
******************************************************************************/
void mixcolumns_1(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 2
******************************************************************************/
void mixcolumns_2(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],8) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,6);
tmp = ROR(state[i],16) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],24) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,2);
}
}
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 3
******************************************************************************/
void mixcolumns_3(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x30303030;
state[i] ^= ROR(tmp,4);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,26);
}
}
/******************************************************************************
* The inverse MixColumns operation for rounds i such that (i % 4) == 0
******************************************************************************/
void inv_mixcolumns_0(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],8) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,2);
tmp = ROR(state[i],16) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,4);
tmp = ROR(state[i],24) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,30);
}
}
/******************************************************************************
* The inverse MixColumns operation for rounds i such that (i % 4) == 0
******************************************************************************/
void inv_mixcolumns_1(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,2);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,30);
}
}
/******************************************************************************
* The inverse MixColumns operation for rounds i such that (i % 4) == 0
******************************************************************************/
void inv_mixcolumns_2(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = ROR(state[i],24) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,2);
tmp = ROR(state[i],16) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],8) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,6);
}
}
/******************************************************************************
* The inverse MixColumns operation for rounds i such that (i % 4) == 0
******************************************************************************/
void inv_mixcolumns_3(u32* state) {
u32 tmp;
for(int i = 0; i < 4; i++) {
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,26);
tmp = state[i] & 0x30303030;
state[i] ^= ROR(tmp,4);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,30);
}
}
/******************************************************************************
* Encryption of a single block without any operation mode using SKINNY-128-384.
* RTK1 and RTK2_3 are given separately to take advantage of the fact that
* TK2 and TK3 remains the same through the entire data encryption/decryption.
******************************************************************************/
void skinny128_384_plus_encrypt(u8* ctext, const u8* ptext, const u32* rtk1,
const u32* rtk2_3) {
u32 tmp; // used in SWAPMOVE macro
u32 state[4]; // 128-bit state
packing(state, ptext); // from byte to bitsliced representation
QUADRUPLE_ROUND(state, rtk1, rtk2_3);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+16);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+32);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+48);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+64);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+80);
QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+96);
QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+112);
QUADRUPLE_ROUND(state, rtk1, rtk2_3+128);
QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+144);
unpacking(ctext, state); // from bitsliced to byte representation
}
/******************************************************************************
* Decryption of a single block without any operation mode using SKINNY-128-384.
* RTK1 and RTK2_3 are given separately to take advantage of the fact that
* TK2 and TK3 remains the same through the entire data encryption/decryption.
******************************************************************************/
void skinny128_384_plus_decrypt(u8* ctext, const u8* ptext, const u32* rtk1,
const u32* rtk2_3) {
u32 tmp; // used in SWAPMOVE macro
u32 state[4]; // 128-bit state
packing(state, ptext); // from byte to bitsliced representation
INV_QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+144);
INV_QUADRUPLE_ROUND(state, rtk1, rtk2_3+128);
INV_QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+112);
INV_QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+96);
INV_QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+80);
INV_QUADRUPLE_ROUND(state, rtk1, rtk2_3+64);
INV_QUADRUPLE_ROUND(state, rtk1+48, rtk2_3+48);
INV_QUADRUPLE_ROUND(state, rtk1+32, rtk2_3+32);
INV_QUADRUPLE_ROUND(state, rtk1+16, rtk2_3+16);
INV_QUADRUPLE_ROUND(state, rtk1, rtk2_3);
unpacking(ctext, state); // from bitsliced to byte representation
}
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
#include "tk_schedule.h"
void skinny128_384_plus_encrypt(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2_3);
void skinny128_384_plus_decrypt(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2_3);
#define SKINNY128_384_ROUNDS 40
#define QUADRUPLE_ROUND(state, rtk1, rtk2_3) ({ \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[0]; \
state[1] ^= (rtk1)[1]; \
state[2] ^= (rtk1)[2]; \
state[3] ^= (rtk1)[3]; \
state[0] ^= (rtk2_3)[0]; \
state[1] ^= (rtk2_3)[1]; \
state[2] ^= (rtk2_3)[2]; \
state[3] ^= (rtk2_3)[3]; \
mixcolumns_0(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[4]; \
state[1] ^= (rtk1)[5]; \
state[2] ^= (rtk1)[6]; \
state[3] ^= (rtk1)[7]; \
state[0] ^= (rtk2_3)[4]; \
state[1] ^= (rtk2_3)[5]; \
state[2] ^= (rtk2_3)[6]; \
state[3] ^= (rtk2_3)[7]; \
mixcolumns_1(state); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[0] ^= (rtk1)[8]; \
state[1] ^= (rtk1)[9]; \
state[2] ^= (rtk1)[10]; \
state[3] ^= (rtk1)[11]; \
state[0] ^= (rtk2_3)[8]; \
state[1] ^= (rtk2_3)[9]; \
state[2] ^= (rtk2_3)[10]; \
state[3] ^= (rtk2_3)[11]; \
mixcolumns_2(state); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[0] ^= (rtk1)[12]; \
state[1] ^= (rtk1)[13]; \
state[2] ^= (rtk1)[14]; \
state[3] ^= (rtk1)[15]; \
state[0] ^= (rtk2_3)[12]; \
state[1] ^= (rtk2_3)[13]; \
state[2] ^= (rtk2_3)[14]; \
state[3] ^= (rtk2_3)[15]; \
mixcolumns_3(state); \
})
#define INV_QUADRUPLE_ROUND(state, rtk1, rtk2_3) ({ \
inv_mixcolumns_3(state); \
state[0] ^= (rtk1)[12]; \
state[1] ^= (rtk1)[13]; \
state[2] ^= (rtk1)[14]; \
state[3] ^= (rtk1)[15]; \
state[0] ^= (rtk2_3)[12]; \
state[1] ^= (rtk2_3)[13]; \
state[2] ^= (rtk2_3)[14]; \
state[3] ^= (rtk2_3)[15]; \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
inv_mixcolumns_2(state); \
state[0] ^= (rtk1)[8]; \
state[1] ^= (rtk1)[9]; \
state[2] ^= (rtk1)[10]; \
state[3] ^= (rtk1)[11]; \
state[0] ^= (rtk2_3)[8]; \
state[1] ^= (rtk2_3)[9]; \
state[2] ^= (rtk2_3)[10]; \
state[3] ^= (rtk2_3)[11]; \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
inv_mixcolumns_1(state); \
state[0] ^= (rtk1)[4]; \
state[1] ^= (rtk1)[5]; \
state[2] ^= (rtk1)[6]; \
state[3] ^= (rtk1)[7]; \
state[0] ^= (rtk2_3)[4]; \
state[1] ^= (rtk2_3)[5]; \
state[2] ^= (rtk2_3)[6]; \
state[3] ^= (rtk2_3)[7]; \
SWAPMOVE(state[1], state[2], 0x55555555, 0); \
state[3] ^= (state[0] | state[1]); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
inv_mixcolumns_0(state); \
state[0] ^= (rtk1)[0]; \
state[1] ^= (rtk1)[1]; \
state[2] ^= (rtk1)[2]; \
state[3] ^= (rtk1)[3]; \
state[0] ^= (rtk2_3)[0]; \
state[1] ^= (rtk2_3)[1]; \
state[2] ^= (rtk2_3)[2]; \
state[3] ^= (rtk2_3)[3]; \
SWAPMOVE(state[3], state[0], 0x55555555, 0); \
state[1] ^= (state[2] | state[3]); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
SWAPMOVE(state[0], state[3], 0x55555555, 1); \
SWAPMOVE(state[1], state[0], 0x55555555, 1); \
state[1] ^= ~(state[2] | state[3]); \
SWAPMOVE(state[3], state[2], 0x55555555, 1); \
SWAPMOVE(state[2], state[1], 0x55555555, 1); \
state[3] ^= ~(state[0] | state[1]); \
})
#endif // SKINNY128_H_
\ No newline at end of file
#ifndef SKINNYAEADM1_H_
#define SKINNYAEADM1_H_
#include "skinny128.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#define TAGBYTES 16
#define KEYBYTES 16
#define BLOCKBYTES 16
#define SET_DOMAIN(ptr, domain) ((ptr)[15] = (domain))
#define UPDATE_LFSR(lfsr) ({ \
feedback = ((lfsr) & (1ULL << 63)) ? 0x1B : 0x00; \
(lfsr) = ((lfsr) << 1) ^ feedback; \
})
#define LE_STR_64(ptr, x) ({ \
(ptr)[0] = (u8)(x); \
(ptr)[1] = (u8)((x) >> 8); \
(ptr)[2] = (u8)((x) >> 16); \
(ptr)[3] = (u8)((x) >> 24); \
(ptr)[4] = (u8)((x) >> 32); \
(ptr)[5] = (u8)((x) >> 40); \
(ptr)[6] = (u8)((x) >> 48); \
(ptr)[7] = (u8)((x) >> 56); \
})
#endif // SKINNYAEADM1_H_
\ No newline at end of file
#ifndef TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
typedef unsigned char u8;
typedef unsigned int u32;
void packing(u32* out, const u8* in);
void unpacking(u8* out, u32 *in);
void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8* tk3);
void precompute_rtk1(u32* rtk1, const u8* tk1);
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define XOR_BLOCKS(x,y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
(x)[2] ^= (y)[2]; \
(x)[3] ^= (y)[3]; \
})
#define SWAPMOVE(a, b, mask, n) ({ \
tmp = (b ^ (a >> n)) & mask; \
b ^= tmp; \
a ^= (tmp << n); \
})
#define LE_LOAD(x, y) \
*(x) = (((u32)(y)[3] << 24) | \
((u32)(y)[2] << 16) | \
((u32)(y)[1] << 8) | \
(y)[0]);
#define LE_STORE(x, y) \
(x)[0] = (y) & 0xff; \
(x)[1] = ((y) >> 8) & 0xff; \
(x)[2] = ((y) >> 16) & 0xff; \
(x)[3] = (y) >> 24;
#endif // TK_SCHEDULE_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
\ No newline at end of file
/******************************************************************************
* Fixsliced implementation of SKINNY-128-384.
* Two blocks are processed in parallel.
*
* This implementation doesn't compute the ShiftRows operation. Some masks and
* shifts are applied during the MixColumns operation so that the proper bits
* are XORed together. Moreover, the row permutation within the MixColumns
* is omitted, as well as the bit permutation at the end of the Sbox. The rows
* are synchronized with the classical after only 4 rounds. However, the Sbox
* permutation requires 8 rounds for a synchronization. To limit the impact
* on code size, we compute the permutation every 4 rounds. Therefore, this
* implementation relies on a "QUADRUPLE_ROUND" routine.
*
* For more details, see the paper at: https://
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date May 2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/****************************************************************************
* The MixColumns operation for rounds i such that (i % 4) == 0.
****************************************************************************/
void mixcolumns_0(u32* state) {
u32 tmp;
for(int i = 0; i < 8; i++) {
tmp = ROR(state[i],24) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,30);
tmp = ROR(state[i],16) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,4);
tmp = ROR(state[i],8) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,2);
}
}
/****************************************************************************
* The MixColumns operation for rounds i such that (i % 4) == 1.
****************************************************************************/
void mixcolumns_1(u32* state) {
u32 tmp;
for(int i = 0; i < 8; i++) {
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,2);
}
}
/****************************************************************************
* The MixColumns operation for rounds i such that (i % 4) == 2.
****************************************************************************/
void mixcolumns_2(u32* state) {
u32 tmp;
for(int i = 0; i < 8; i++) {
tmp = ROR(state[i],8) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,6);
tmp = ROR(state[i],16) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],24) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,2);
}
}
/****************************************************************************
* The MixColumns operation for rounds i such that (i % 4) == 3.
****************************************************************************/
void mixcolumns_3(u32* state) {
u32 tmp;
for(int i = 0; i < 8; i++) {
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,30);
tmp = state[i] & 0x30303030;
state[i] ^= ROR(tmp,4);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,26);
}
}
/****************************************************************************
* The inverse MixColumns oepration for rounds i such that (i % 4) == 0
****************************************************************************/
void inv_mixcolumns_0(u32* state) {
u32 tmp;
for(int i = 0; i < 8; i++) {
tmp = ROR(state[i],8) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,2);
tmp = ROR(state[i],16) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,4);
tmp = ROR(state[i],24) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,30);
}
}
/****************************************************************************
* The inverse MixColumns oepration for rounds i such that (i % 4) == 1
****************************************************************************/
void inv_mixcolumns_1(u32* state) {
u32 tmp;
for(int i = 0; i < 8; i++) {
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,2);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],16) & 0x30303030;
state[i] ^= ROR(tmp,30);
}
}
/****************************************************************************
* The inverse MixColumns oepration for rounds i such that (i % 4) == 2
****************************************************************************/
void inv_mixcolumns_2(u32* state) {
u32 tmp;
for(int i = 0; i < 8; i++) {
tmp = ROR(state[i],24) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,2);
tmp = ROR(state[i],16) & 0x0c0c0c0c;
state[i] ^= ROR(tmp,28);
tmp = ROR(state[i],8) & 0xc0c0c0c0;
state[i] ^= ROR(tmp,6);
}
}
/****************************************************************************
* The inverse MixColumns oepration for rounds i such that (i % 4) == 3
****************************************************************************/
void inv_mixcolumns_3(u32* state) {
u32 tmp;
for(int i = 0; i < 8; i++) {
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,26);
tmp = state[i] & 0x30303030;
state[i] ^= ROR(tmp,4);
tmp = state[i] & 0x03030303;
state[i] ^= ROR(tmp,30);
}
}
/****************************************************************************
* Adds the tweakey (including the round constants) to the state.
****************************************************************************/
void add_tweakey(u32* state, const u32* rtk1, const u32* rtk2_3) {
state[0] ^= rtk1[0] ^ rtk2_3[0];
state[1] ^= rtk1[1] ^ rtk2_3[1];
state[2] ^= rtk1[2] ^ rtk2_3[2];
state[3] ^= rtk1[3] ^ rtk2_3[3];
state[4] ^= rtk1[4] ^ rtk2_3[4];
state[5] ^= rtk1[5] ^ rtk2_3[5];
state[6] ^= rtk1[6] ^ rtk2_3[6];
state[7] ^= rtk1[7] ^ rtk2_3[7];
}
/****************************************************************************
* Encryption of 2 blocks in parallel using SKINNY-128-384.
* The input parameters 'rtk1' and 'rtk2_3' are given seperately to avoid
* unnecessary recomputations of the entire tk schedule during SKINNY-AEAD-M1.
****************************************************************************/
void skinny128_384_plus_encrypt(u8* ctext, u8* ctext_bis, const u8* ptext,
const u8* ptext_bis, const tweakey tk) {
u32 state[8];
packing(state, ptext, ptext_bis);
QUADRUPLE_ROUND(state, tk.rtk1, tk.rtk2_3);
QUADRUPLE_ROUND(state, tk.rtk1+32, tk.rtk2_3+32);
QUADRUPLE_ROUND(state, tk.rtk1+64, tk.rtk2_3+64);
QUADRUPLE_ROUND(state, tk.rtk1+96, tk.rtk2_3+96);
QUADRUPLE_ROUND(state, tk.rtk1, tk.rtk2_3+128);
QUADRUPLE_ROUND(state, tk.rtk1+32, tk.rtk2_3+160);
QUADRUPLE_ROUND(state, tk.rtk1+64, tk.rtk2_3+192);
QUADRUPLE_ROUND(state, tk.rtk1+96, tk.rtk2_3+224);
QUADRUPLE_ROUND(state, tk.rtk1, tk.rtk2_3+256);
QUADRUPLE_ROUND(state, tk.rtk1+32, tk.rtk2_3+288);
unpacking(ctext, ctext_bis, state);
}
/****************************************************************************
* Decryption of 2 blocks in parallel using SKINNY-128-384.
* The input parameters 'rtk1' and 'rtk2_3' are given seperately to avoid
* unnecessary recomputations of the entire tk schedule during SKINNY-AEAD-M1.
****************************************************************************/
void skinny128_384_plus_decrypt(u8* ptext, u8* ptext_bis, const u8* ctext,
const u8* ctext_bis, const tweakey tk) {
u32 state[8];
packing(state, ctext, ctext_bis);
INV_QUADRUPLE_ROUND(state, tk.rtk1+32, tk.rtk2_3+288);
INV_QUADRUPLE_ROUND(state, tk.rtk1, tk.rtk2_3+256);
INV_QUADRUPLE_ROUND(state, tk.rtk1+96, tk.rtk2_3+224);
INV_QUADRUPLE_ROUND(state, tk.rtk1+64, tk.rtk2_3+192);
INV_QUADRUPLE_ROUND(state, tk.rtk1+32, tk.rtk2_3+160);
INV_QUADRUPLE_ROUND(state, tk.rtk1, tk.rtk2_3+128);
INV_QUADRUPLE_ROUND(state, tk.rtk1+96, tk.rtk2_3+96);
INV_QUADRUPLE_ROUND(state, tk.rtk1+64, tk.rtk2_3+64);
INV_QUADRUPLE_ROUND(state, tk.rtk1+32, tk.rtk2_3+32);
INV_QUADRUPLE_ROUND(state, tk.rtk1, tk.rtk2_3);
unpacking(ptext, ptext_bis, state);
}
\ No newline at end of file
#ifndef SKINNY128_H_
#define SKINNY128_H_
#include "tk_schedule.h"
void skinny128_384_plus_encrypt(u8* ctext, u8* ctext_bis, const u8* ptext,
const u8* ptext_bis, const tweakey tk);
void skinny128_384_plus_decrypt(u8* ctext, u8* ctext_bis, const u8* ptext,
const u8* ptext_bis, const tweakey tk);
#define SKINNY128_384_ROUNDS 40
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define QUADRUPLE_ROUND(state, rtk1, rtk2_3) ({ \
state[3] ^= (state[0] | state[1]); \
state[7] ^= (state[4] | state[5]); \
state[1] ^= (state[6] | state[5]); \
state[2] ^= (state[3] & state[7]); \
state[6] ^= (~state[7] | state[4]); \
state[0] ^= (state[2] | ~state[1]); \
state[4] ^= (~state[3] | state[2]); \
state[5] ^= (state[6] & state[0]); \
add_tweakey(state, rtk1, rtk2_3); \
mixcolumns_0(state); \
state[4] ^= (state[2] | state[3]); \
state[5] ^= (state[6] | state[1]); \
state[3] ^= (state[0] | state[1]); \
state[7] ^= (state[4] & state[5]); \
state[0] ^= (~state[5] | state[6]); \
state[2] ^= (state[7] | ~state[3]); \
state[6] ^= (~state[4] | state[7]); \
state[1] ^= (state[0] & state[2]); \
add_tweakey(state, rtk1+8, rtk2_3+8); \
mixcolumns_1(state); \
state[6] ^= (state[7] | state[4]); \
state[1] ^= (state[0] | state[3]); \
state[4] ^= (state[2] | state[3]); \
state[5] ^= (state[6] & state[1]); \
state[2] ^= (~state[1] | state[0]); \
state[7] ^= (state[5] | ~state[4]); \
state[0] ^= (~state[6] | state[5]); \
state[3] ^= (state[2] & state[7]); \
add_tweakey(state, rtk1+16, rtk2_3+16); \
mixcolumns_2(state); \
state[0] ^= (state[5] | state[6]); \
state[3] ^= (state[2] | state[4]); \
state[6] ^= (state[7] | state[4]); \
state[1] ^= (state[0] & state[3]); \
state[7] ^= (~state[3] | state[2]); \
state[5] ^= (state[1] | ~state[6]); \
state[2] ^= (~state[0] | state[1]); \
state[4] ^= (state[7] & state[5]); \
add_tweakey(state, rtk1+24, rtk2_3+24); \
mixcolumns_3(state); \
state[0] ^= state[1]; \
state[1] ^= state[0]; \
state[0] ^= state[1]; \
state[2] ^= state[3]; \
state[3] ^= state[2]; \
state[2] ^= state[3]; \
state[4] ^= state[7]; \
state[7] ^= state[4]; \
state[4] ^= state[7]; \
state[5] ^= state[6]; \
state[6] ^= state[5]; \
state[5] ^= state[6]; \
})
#define INV_QUADRUPLE_ROUND(state, rtk1, rtk2_3) ({ \
state[0] ^= state[1]; \
state[1] ^= state[0]; \
state[0] ^= state[1]; \
state[2] ^= state[3]; \
state[3] ^= state[2]; \
state[2] ^= state[3]; \
state[4] ^= state[7]; \
state[7] ^= state[4]; \
state[4] ^= state[7]; \
state[5] ^= state[6]; \
state[6] ^= state[5]; \
state[5] ^= state[6]; \
inv_mixcolumns_3(state); \
add_tweakey(state, rtk1+24, rtk2_3+24); \
state[4] ^= (state[7] & state[5]); \
state[2] ^= (~state[0] | state[1]); \
state[5] ^= (state[1] | ~state[6]); \
state[7] ^= (~state[3] | state[2]); \
state[1] ^= (state[0] & state[3]); \
state[6] ^= (state[7] | state[4]); \
state[3] ^= (state[2] | state[4]); \
state[0] ^= (state[5] | state[6]); \
inv_mixcolumns_2(state); \
add_tweakey(state, rtk1+16, rtk2_3+16); \
state[3] ^= (state[2] & state[7]); \
state[0] ^= (~state[6] | state[5]); \
state[7] ^= (state[5] | ~state[4]); \
state[2] ^= (~state[1] | state[0]); \
state[5] ^= (state[6] & state[1]); \
state[4] ^= (state[2] | state[3]); \
state[1] ^= (state[0] | state[3]); \
state[6] ^= (state[7] | state[4]); \
inv_mixcolumns_1(state); \
add_tweakey(state, rtk1+8, rtk2_3+8); \
state[1] ^= (state[0] & state[2]); \
state[6] ^= (~state[4] | state[7]); \
state[2] ^= (state[7] | ~state[3]); \
state[0] ^= (~state[5] | state[6]); \
state[7] ^= (state[4] & state[5]); \
state[3] ^= (state[0] | state[1]); \
state[5] ^= (state[6] | state[1]); \
state[4] ^= (state[2] | state[3]); \
inv_mixcolumns_0(state); \
add_tweakey(state, rtk1, rtk2_3); \
state[5] ^= (state[6] & state[0]); \
state[4] ^= (~state[3] | state[2]); \
state[0] ^= (state[2] | ~state[1]); \
state[6] ^= (~state[7] | state[4]); \
state[2] ^= (state[3] & state[7]); \
state[1] ^= (state[6] | state[5]); \
state[7] ^= (state[4] | state[5]); \
state[3] ^= (state[0] | state[1]); \
})
#endif // SKINNY128_H_
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment