Commit 04be7d51 by Enrico Pozzobon

Merge branch 'email-submissions'

parents c5473c21 6ec7ff92
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#ifndef __BYTES_UTILS_H__
#define __BYTES_UTILS_H__
#include <stdio.h>
#include <stdint.h>
#include <string.h>
//#ifndef bytes_utiles_printf
//#define bytes_utiles_printf printf
//#endif
#ifndef bytes_utiles_printf
#define bytes_utiles_printf printf
#endif
//replace 0 by .
static void print_diff_byte(uint8_t d, const char *sep){
unsigned int n=d>>4;
if(0==n) bytes_utiles_printf("."); else bytes_utiles_printf("%X",n);
n = d & 0xF;
if(0==n) bytes_utiles_printf("."); else bytes_utiles_printf("%X",n);
bytes_utiles_printf("%s",sep);
}
static void print_diff_bytes_sep(const char *msg,const void *vbuf, unsigned int size, const char *m2, const char *sep){
const uint8_t*const buf = (const uint8_t*const)vbuf;
bytes_utiles_printf("%s",msg);
if(size){
unsigned int i;
for(i=0;i<size-1;i++) print_diff_byte(buf[i],sep);
print_diff_byte(buf[i],"");
}
bytes_utiles_printf("%s", m2);
}
static void print_bytes_sep(const char *msg,const void *vbuf, unsigned int size, const char *m2, const char *sep){
const uint8_t*const buf = (const uint8_t*const)vbuf;
bytes_utiles_printf("%s",msg);
if(size){
unsigned int i;
for(i=0;i<size-1;i++) bytes_utiles_printf("%02X%s",buf[i],sep);
bytes_utiles_printf("%02X",buf[i]);
}
bytes_utiles_printf("%s", m2);
}
static void print_bytes(const char *m,const void *buf, unsigned int size, const char *m2){print_bytes_sep(m,buf,size,m2," ");}
static void println_bytes(const char *m,const void *buf, unsigned int size){print_bytes(m,buf,size,"\n");}
static void print_128(const char *m, const uint8_t a[16], const char *m2){
print_bytes_sep( m,a ,4,"_","");
print_bytes_sep("",a+4 ,4,"_","");
print_bytes_sep("",a+8 ,4,"_","");
print_bytes_sep("",a+12,4,m2 ,"");
}
static void println_128(const char m[], const uint8_t a[16]){print_128(m,a,"\n");}
static void xor_bytes( uint8_t *d, const uint8_t *s, size_t size ){
for(size_t i=0;i<size;i++)
d[i] ^= s[i];
}
static int hexdigit_value(char c){
int nibble = -1;
if(('0'<=c) && (c<='9')) nibble = c-'0';
if(('a'<=c) && (c<='f')) nibble = c-'a' + 10;
if(('A'<=c) && (c<='F')) nibble = c-'A' + 10;
return nibble;
}
static int is_hexdigit(char c){
return -1!=hexdigit_value(c);
}
static size_t hexstr_to_bytes(uint8_t *dst, size_t dst_size, const char *const hexstr){
unsigned int len = strlen(hexstr);
if(dst_size>(len/2))
dst_size = (len/2);
memset(dst,0,dst_size);
for(unsigned int i=0;i<dst_size*2;i++){
unsigned int shift = 4 - 4*(i & 1);
unsigned int charIndex = i;//len-1-i;
char c = hexstr[charIndex];
uint8_t nibble = hexdigit_value(c);
dst[i/2] |= nibble << shift;
}
return dst_size;
}
static void bytes_to_hexstr(char *dst,uint8_t *bytes, unsigned int nBytes){
unsigned int i;
for(i=0;i<nBytes;i++){
sprintf(dst+2*i,"%02X",bytes[i]);
}
}
static size_t cleanup_hexstr(char *hexstr, size_t hexstr_size, char *str, size_t str_size){
size_t cnt=0;
int lastIs0=0;
for(unsigned int j = 0;j<str_size;j++){
char c = str[j];
if(is_hexdigit(c)){
if(cnt==hexstr_size-1){//need final char for null.
printf("Too many hex digits. hexstr=%s\n",hexstr);
hexstr[cnt]=0;
return -1;
}
hexstr[cnt++]=c;
} else if(lastIs0) {
if('x'==c) cnt--;
if('X'==c) cnt--;
}
lastIs0 = '0'==c;
}
hexstr[cnt]=0;
return cnt;
}
static size_t user_hexstr_to_bytes(uint8_t*out, size_t out_size, char *str, size_t str_size){
size_t hexstr_size = cleanup_hexstr(str,str_size,str,str_size);
size_t conv_size = (hexstr_size/2) < out_size ? hexstr_size/2 : out_size;
return hexstr_to_bytes(out,conv_size,str);
}
static void bytes_utils_remove_unused_warnings(void){
(void)println_bytes;
(void)println_128;
(void)xor_bytes;
(void)bytes_to_hexstr;
(void)user_hexstr_to_bytes;
(void)print_diff_bytes_sep;
}
#endif
/**
DryGascon128
Sebastien Riou, January 27th 2019
c99 ref implementation meant to fit in the supercop framework
*/
#ifndef __DRYGASCON128_H__
#define __DRYGASCON128_H__
#define DRYSPONGE_DBG_EN 0
//#define DRYSPONGE_ACCUMULATE_SAFE_AND_SLOW
#define DRYSPONGE_KEYSIZE 16
#define DRYSPONGE_NONCESIZE 16
#define DRYSPONGE_BLOCKSIZE 16
#define DRYSPONGE_CAPACITYSIZE (5*64/8)
#define DRYSPONGE_XSIZE (4*32/8)
//remove one round because Mix does 1 round merely for processing the upper
//2 bits of the domain separator (because 128+4 mod 10 is 2)
#define DRYSPONGE_INIT_ROUNDS (12-1)
#define DRYSPONGE_ROUNDS (8-1)
#define DRYSPONGE_ACCUMULATE_FACTOR 2
#define DRYSPONGE_MPR_INPUT_WIDTH 10
#include "drygascon_le32.h"
#endif
/**
DryGascon_le32
Sebastien Riou, January 6th 2019
c99 little endian 32 bit implementation meant to fit in the supercop framework
*/
#ifndef __DRYGASCON_H__
#define __DRYGASCON_H__
#include <stdint.h>
typedef uint64_t DRYSPONGE_EXT_t;
#define DRYSPONGE_EXT
#include "drysponge_common.h"
//input width for one round of MixPhaseRound
#define DRYSPONGE_MPR_INPUT_MASK ((((uint64_t)1)<<DRYSPONGE_MPR_INPUT_WIDTH)-1)
#define DRYSPONGE_MPR_ROUNDS DRYSPONGE_DIVUP((DRYSPONGE_BLOCKSIZE*8)+4,DRYSPONGE_MPR_INPUT_WIDTH)
#if (DRYSPONGE_MPR_ROUNDS*DRYSPONGE_MPR_INPUT_WIDTH-4)<(DRYSPONGE_BLOCKSIZE*8)
#error "(DRYSPONGE_MPR_ROUNDS*DRYSPONGE_MPR_INPUT_WIDTH-4)<(DRYSPONGE_BLOCKSIZE*8)"
#endif
#if DRYSPONGE_XSIZE32>16
#error "DRYSPONGE_XSIZE32>16"
#endif
#if DRYSPONGE_XSIZE32 == 4
#define DRYSPONGE_X_IDX_WIDTH 2
#endif
#if DRYSPONGE_MPR_INPUT_WIDTH == 10
#define DRYSPONGE_RANK_BYTES 2
typedef uint32_t permut_rank_t;
#endif
#if DRYSPONGE_MPR_INPUT_WIDTH == 18
#define DRYSPONGE_RANK_BYTES 3
typedef uint32_t permut_rank_t;
#endif
#define DRYSPONGE_X_IDX_MASK ((1<<DRYSPONGE_X_IDX_WIDTH)-1)
#ifndef DRYSPONGE_OPT_F
DRYSPONGE_FUNC void DRYSPONGE_DomainSeparator(
DRYSPONGE_EXT_t *const ext,
unsigned int dsinfo
){
*ext = dsinfo;
*ext = *ext<<((DRYSPONGE_BLOCKSIZE*8)%DRYSPONGE_MPR_INPUT_WIDTH);
}
DRYSPONGE_FUNC void DRYSPONGE_MixPhaseRound(
DRYSPONGE_EXT_t ext,
uint64_t *const c64,
uint64_t *const x64,
const uint8_t *const in,
unsigned int bitidx,
unsigned int insize
){
uint32_t *const x32 = (uint32_t*const)x64;
unsigned int bi = bitidx/8;
unsigned int shift = bitidx%8;
permut_rank_t r=0;
uint8_t *rb = (uint8_t*)&r;
for(unsigned int i=0;i<DRYSPONGE_RANK_BYTES;i++){
if(bi+i==insize) break;
rb[i]=in[bi+i];
}
r = (r>>shift) & DRYSPONGE_MPR_INPUT_MASK;
r^=ext;
for(unsigned int j=0;j<DRYSPONGE_CAPACITYSIZE64;j++){
unsigned int i = r & DRYSPONGE_X_IDX_MASK;
r = r >> DRYSPONGE_X_IDX_WIDTH;
c64[j]^=x32[i];
}
}
#endif
struct DRYSPONGE_struct_t;
typedef struct DRYSPONGE_struct_t DRYSPONGE_t ;
DRYSPONGE_FUNC void DRYSPONGE_MixPhase(
DRYSPONGE_t *const ctx,
const uint8_t *const in
);
DRYSPONGE_FUNC void DRYSPONGE_CoreRound(
DRYSPONGE_t *const ctx,
unsigned int r
);
#include "drysponge_le32.h"
#ifndef DRYSPONGE_OPT_F
DRYSPONGE_FUNC void DRYSPONGE_MixPhase(
DRYSPONGE_t *const ctx,
const uint8_t *const in
){
unsigned int bitidx=0;
#if DRYSPONGE_MPR_ROUNDS > 1
for(unsigned int i=0;i<DRYSPONGE_MPR_ROUNDS-1;i++){
#if DRYSPONGE_DBG_EN >= 4
printf("Mix phase MixPhaseRound entry %lu:\n",i);
DRYSPONGE_print_state(ctx);
#endif
DRYSPONGE_EXT_t ext=0;
#if ((DRYSPONGE_MPR_ROUNDS-1)*(DRYSPONGE_MPR_INPUT_WIDTH))>(DRYSPONGE_BLOCKSIZE*8)
if((ctx->ext) && (i==(DRYSPONGE_MPR_ROUNDS-2))){
//DS info is split accross this block and the last one
ext = ctx->ext;
ctx->ext = ctx->ext >> ((DRYSPONGE_BLOCKSIZE*8)%DRYSPONGE_MPR_INPUT_WIDTH);
ctx->ext = ctx->ext >> ((((DRYSPONGE_MPR_ROUNDS-1)*DRYSPONGE_MPR_INPUT_WIDTH))-(DRYSPONGE_BLOCKSIZE*8));
}
#endif
DRYSPONGE_MixPhaseRound(ext,ctx->c,ctx->x,in,bitidx,DRYSPONGE_BLOCKSIZE);
bitidx+=DRYSPONGE_MPR_INPUT_WIDTH;
#if DRYSPONGE_DBG_EN >= 4
printf("Mix phase CoreRound entry %lu:\n",i);
DRYSPONGE_print_state(ctx);
#endif
DRYSPONGE_CoreRound(ctx,0);
}
#endif
#if DRYSPONGE_DBG_EN >= 4
printf("Mix phase MixPhaseRound entry %lu:\n",DRYSPONGE_MPR_ROUNDS-1);
DRYSPONGE_print_state(ctx);
#endif
DRYSPONGE_MixPhaseRound(ctx->ext,ctx->c,ctx->x,in,bitidx,DRYSPONGE_BLOCKSIZE);
ctx->ext=0;
}
#endif
//#ifndef DRYSPONGE_OPT_G //keep for now, needed for key init
DRYSPONGE_FUNC void gascon_sboxes(uint64_t * const x, unsigned int nw){
uint64_t t[DRYSPONGE_CAPACITYSIZE64];
const unsigned int mid = nw/2;
for(unsigned int i=0;i<mid+1;i++){
unsigned int dst = 2*i;
unsigned int src = (nw+dst-1) % nw;
x[dst] ^= x[src];
}
for(unsigned int i=0;i<nw;i++){
t[i] = (x[i] ^ 0xFFFFFFFFFFFFFFFFull) & x[(i+1)%nw];
}
for(unsigned int i=0;i<nw;i++){
x[i] ^= t[(i+1)%nw];
}
for(unsigned int i=0;i<mid+1;i++){
unsigned int src = 2*i;
unsigned int dst = (src+1) % nw;
x[dst] ^= x[src];
}
x[mid] ^= 0XFFFFFFFFFFFFFFFFull;
}
DRYSPONGE_FUNC uint64_t gascon_rotr64_interleaved(uint64_t in, unsigned int shift){
uint32_t i[2];
memcpy(i,&in,sizeof(i));
unsigned int shift2 = shift/2;
if(shift & 1){
uint32_t tmp = DRYSPONGE_ROTR32(i[1],shift2);
i[1] = DRYSPONGE_ROTR32(i[0],(shift2+1)%32);
i[0] = tmp;
}else{
i[0] = DRYSPONGE_ROTR32(i[0],shift2);
i[1] = DRYSPONGE_ROTR32(i[1],shift2);
}
uint64_t out;
memcpy(&out,i,sizeof(i));
return out;
}
DRYSPONGE_FUNC void gascon_add_cst(uint64_t* x, unsigned int round) {
const unsigned int mid = DRYSPONGE_CAPACITYSIZE64 / 2;
unsigned int rounds = 12;
const unsigned int r = 12-rounds+round;
// addition of round constant
x[mid] ^= ((0xfull - r) << 4) | r;
}
DRYSPONGE_FUNC void gascon_lin_layer(uint64_t* x) {
// linear diffusion layer
x[0] ^= gascon_rotr64_interleaved(x[0], 19) ^ gascon_rotr64_interleaved(x[0], 28);
x[1] ^= gascon_rotr64_interleaved(x[1], 61) ^ gascon_rotr64_interleaved(x[1], 38);
x[2] ^= gascon_rotr64_interleaved(x[2], 1) ^ gascon_rotr64_interleaved(x[2], 6);
x[3] ^= gascon_rotr64_interleaved(x[3], 10) ^ gascon_rotr64_interleaved(x[3], 17);
x[4] ^= gascon_rotr64_interleaved(x[4], 7) ^ gascon_rotr64_interleaved(x[4], 40);
#if DRYSPONGE_CAPACITYSIZE64 > 5
x[5] ^= gascon_rotr64_interleaved(x[5], 31) ^ gascon_rotr64_interleaved(x[5], 26);
x[6] ^= gascon_rotr64_interleaved(x[6], 53) ^ gascon_rotr64_interleaved(x[6], 58);
x[7] ^= gascon_rotr64_interleaved(x[7], 9) ^ gascon_rotr64_interleaved(x[7], 46);
x[8] ^= gascon_rotr64_interleaved(x[8], 43) ^ gascon_rotr64_interleaved(x[8], 50);
#endif
}
DRYSPONGE_FUNC void gascon_permutation_round(uint64_t* S, unsigned int round) {
(void)DRYSPONGE_rotr64;
// addition of round constant
gascon_add_cst(S, round);
// substitution layer
gascon_sboxes(S,DRYSPONGE_CAPACITYSIZE64);
// linear diffusion layer
gascon_lin_layer(S);
}
DRYSPONGE_FUNC void DRYSPONGE_CoreRound(
DRYSPONGE_t *const ctx,
unsigned int r
){
gascon_permutation_round(ctx->c, r);
}
#endif
#ifndef __DRYSPONGE_COMMON_H__
#define __DRYSPONGE_COMMON_H__
#ifndef DRYSPONGE_FUNC
#define DRYSPONGE_FUNC inline static
#endif
//convention:
// width means length in bits
// size means length in bytes
#include <stdint.h>
#include <string.h>
#include <assert.h>
#if DRYSPONGE_DBG_EN
#include "bytes_utils.h"
#endif
#define DRYSPONGE_PASS 0
#define DRYSPONGE_DS 2
#define DRYSPONGE_DD 1
#define DRYSPONGE_DA 2
#define DRYSPONGE_DM 3
#define DRYSPONGE_STATESIZE (DRYSPONGE_CAPACITYSIZE+DRYSPONGE_BLOCKSIZE)
#define DRYSPONGE_DIGESTSIZE (DRYSPONGE_KEYSIZE*2)
#define DRYSPONGE_TAGSIZE DRYSPONGE_KEYSIZE
#define DRYSPONGE_KEYMAXSIZE (DRYSPONGE_CAPACITYSIZE+DRYSPONGE_XSIZE)
#define DRYSPONGE_DIVUP(a,b) (((a)+(b)-1)/(b))
#define DRYSPONGE_ROTR32(x,n) (0xFFFFFFFF & (((x)>>(n))|((x)<<(0x1F & (32-(n))))))
#define DRYSPONGE_ROTR64(x,n) (0xFFFFFFFFFFFFFFFF & (((x)>>(n))|((x)<<(0x3F & (64-(n))))))
#define DRYSPONGE_STATESIZE32 DRYSPONGE_DIVUP(DRYSPONGE_STATESIZE,4)
#define DRYSPONGE_CE_SIZE32 DRYSPONGE_DIVUP(DRYSPONGE_CE_SIZE,4)
#define DRYSPONGE_BLOCKSIZE32 DRYSPONGE_DIVUP(DRYSPONGE_BLOCKSIZE,4)
#define DRYSPONGE_CAPACITYSIZE32 DRYSPONGE_DIVUP(DRYSPONGE_CAPACITYSIZE,4)
#define DRYSPONGE_XSIZE32 DRYSPONGE_DIVUP(DRYSPONGE_XSIZE,4)
#define DRYSPONGE_KEYSIZE32 DRYSPONGE_DIVUP(DRYSPONGE_KEYSIZE,4)
#define DRYSPONGE_STATESIZE64 DRYSPONGE_DIVUP(DRYSPONGE_STATESIZE,8)
#define DRYSPONGE_CE_SIZE64 DRYSPONGE_DIVUP(DRYSPONGE_CE_SIZE,8)
#define DRYSPONGE_BLOCKSIZE64 DRYSPONGE_DIVUP(DRYSPONGE_BLOCKSIZE,8)
#define DRYSPONGE_CAPACITYSIZE64 DRYSPONGE_DIVUP(DRYSPONGE_CAPACITYSIZE,8)
#define DRYSPONGE_XSIZE64 DRYSPONGE_DIVUP(DRYSPONGE_XSIZE,8)
#define DRYSPONGE_KEYSIZE64 DRYSPONGE_DIVUP(DRYSPONGE_KEYSIZE,8)
#define DRYSPONGE_TAGSIZE64 DRYSPONGE_DIVUP(DRYSPONGE_TAGSIZE,8)
#define DRYSPONGE_KEYMAXSIZE64 DRYSPONGE_DIVUP(DRYSPONGE_KEYMAXSIZE,8)
#define DRYSPONGE_NONCESIZE64 DRYSPONGE_DIVUP(DRYSPONGE_NONCESIZE,8)
#if DRYSPONGE_NONCESIZE < 12
#error "DRYSPONGE_NONCESIZE < 12"
#endif
#if DRYSPONGE_KEYSIZE < 16
#error "DRYSPONGE_KEYSIZE < 16"
#endif
#if DRYSPONGE_DIGESTSIZE < 2*DRYSPONGE_KEYSIZE
#error "DRYSPONGE_DIGESTSIZE < 2*DRYSPONGE_KEYSIZE"
#endif
#if DRYSPONGE_ACCUMULATE_FACTOR > ((DRYSPONGE_CAPACITYSIZE/4)/DRYSPONGE_BLOCKSIZE32)
#error "DRYSPONGE_ACCUMULATE_FACTOR > ((DRYSPONGE_CAPACITYSIZE/4)/DRYSPONGE_BLOCKSIZE32)"
#endif
#ifdef DRYSPONGE_EXT
#define DRYSPONGE_EXT_ARG (&(ctx->ext))
#else
#define DRYSPONGE_EXT_ARG 0
#endif
DRYSPONGE_FUNC unsigned int DRYSPONGE_DSINFO(unsigned int padded, unsigned int domain, unsigned int finalize){
#if DRYSPONGE_DBG_EN
bytes_utiles_printf(" Adding DS: padded=%d, domain=%u, finalize=%d\n",padded,domain,finalize);
#endif
return padded+(finalize<<1)+(domain<<2);
}
DRYSPONGE_FUNC uint32_t DRYSPONGE_rotr32(uint32_t x, unsigned int n){
assert(n<32);
return DRYSPONGE_ROTR32(x,n);
}
DRYSPONGE_FUNC uint64_t DRYSPONGE_rotr64(uint64_t x, unsigned int n){
assert(n<64);
return DRYSPONGE_ROTR64(x,n);
}
DRYSPONGE_FUNC void DRYSPONGE_xor(
const uint8_t *const a,//exactly one block of input
const uint8_t *const b,
uint8_t *const y
){
for(unsigned int i=0;i<DRYSPONGE_BLOCKSIZE;i++){
y[i] = a[i] ^ b[i];
}
}
DRYSPONGE_FUNC void DRYSPONGE_load16(uint16_t* x, const uint8_t*const in) {
*x = 0;
for(unsigned int i = 0;i<2;i++){
uint16_t b = in[i];
*x = *x | (b<<(8*i));
}
}
DRYSPONGE_FUNC void DRYSPONGE_load32(uint32_t* x, const uint8_t*const in) {
*x = 0;
for(unsigned int i = 0;i<4;i++){
uint32_t b = in[i];
*x = *x | (b<<(8*i));
}
}
DRYSPONGE_FUNC void DRYSPONGE_store32(uint8_t* out, uint32_t x) {
for(unsigned int i = 0;i<4;i++){
out[i] = x >> (8*i);
}
}
DRYSPONGE_FUNC void DRYSPONGE_load64(uint64_t* x, uint8_t* in) {
*x = 0;
for(unsigned int i = 0;i<8;i++){
uint64_t b = in[i];
*x = *x | (b<<(8*i));
}
}
DRYSPONGE_FUNC void DRYSPONGE_store64(uint8_t* out, uint64_t x) {
(void)DRYSPONGE_rotr32;
(void)DRYSPONGE_load16;
(void)DRYSPONGE_store32;
for(unsigned int i = 0;i<8;i++){
out[i] = x >> (8*i);
}
}
#endif
#ifndef __DRYSPONGE_DBG_SUPPORT_H__
#define __DRYSPONGE_DBG_SUPPORT_H__
#define DRYSPONGE_DBG_NONE 0
#define DRYSPONGE_DBG_ALG_IO 1
#define DRYSPONGE_DBG_F_IO 2
#define DRYSPONGE_DBG_ROUND_IO 3
#define DRYSPONGE_DBG_FULL 4
#if DRYSPONGE_DBG_EN
#define DRYSPONGE_DBG(a) a;
#else
#define DRYSPONGE_DBG(a)
#endif
#if DRYSPONGE_DBG_EN
#include <assert.h>
#include <stdio.h>
#include "bytes_utils.h"
static void DRYSPONGE_print_state(
DRYSPONGE_t *const ctx
){
(void)xor_bytes;
(void)println_128;
(void)bytes_utils_remove_unused_warnings;
unsigned int linesize = 32;
if(linesize<DRYSPONGE_BLOCKSIZE) linesize = DRYSPONGE_BLOCKSIZE;
unsigned int remaining = DRYSPONGE_CAPACITYSIZE;
const uint8_t*const c = (const uint8_t*const)ctx->c;
for(unsigned int i=0;i<DRYSPONGE_DIVUP(DRYSPONGE_CAPACITYSIZE,linesize);i++){
bytes_utiles_printf( " C[%2u] = ",i);
unsigned int len = linesize < remaining ? linesize : remaining;
print_bytes_sep("",c+i*linesize,len,"\n","");
remaining -= len;
}
remaining = DRYSPONGE_XSIZE;
const uint8_t*const x = (const uint8_t*const)ctx->x;
for(unsigned int i=0;i<DRYSPONGE_DIVUP(DRYSPONGE_XSIZE,linesize);i++){
bytes_utiles_printf( " X[%2u] = ",i);
unsigned int len = linesize < remaining ? linesize : remaining;
print_bytes_sep("",x+i*linesize,len,"\n","");
remaining -= len;
}
print_bytes_sep(" R = ",ctx->r,DRYSPONGE_BLOCKSIZE,"\n","");
}
#endif
#endif
/**
DrySponge
Sebastien Riou, January 6th 2019
c99 little endian 32 bit implementation meant to fit in the supercop framework
Note: although this is faster than the ref implementation we noticed that it is
still several times slower compared to what can be done with assembly.
*/
#ifndef __DRYSPONGE_H__
#define __DRYSPONGE_H__
#include "drysponge_common.h"
//assume 32bit alignement is enough to access uint64_t since we target 32 bit CPU
#define ALIGN64 4
//#define ALIGN64 8
#ifndef DRYSPONGE_DBG_EN
#define DRYSPONGE_DBG_EN 0
#endif
typedef struct DRYSPONGE_struct_t {
uint64_t c[DRYSPONGE_CAPACITYSIZE64];
uint64_t r[DRYSPONGE_BLOCKSIZE64];
uint64_t x[DRYSPONGE_XSIZE64];
uint8_t *obuf;
uint64_t fcnt;
#ifdef DRYSPONGE_EXT
DRYSPONGE_EXT_t ext;
#endif
unsigned int rounds;
} DRYSPONGE_t;
#include "drysponge_dbg_support.h"
static void DRYSPONGE_xor64(
const uint64_t *const a,//exactly one block of input
const uint64_t *const b,
uint64_t *const y
){
for(unsigned int i=0;i<DRYSPONGE_BLOCKSIZE64;i++){
y[i] = a[i] ^ b[i];
}
}
//static void DRYSPONGE_xor32(
// const uint32_t *const a,//exactly one block of input
// const uint32_t *const b,
// uint32_t *const y
//){
// for(unsigned int i=0;i<DRYSPONGE_BLOCKSIZE32;i++){
// y[i] = a[i] ^ b[i];
// }
//}
#ifdef DRYSPONGE_OPT_G
void drygascon128_g(uint64_t* x, uint32_t rounds);
static void DRYSPONGE_g(
DRYSPONGE_t *const ctx
){
DRYSPONGE_OPT_G((uint64_t*)&(ctx->c),ctx->rounds);
}
#else
static void DRYSPONGE_g(
DRYSPONGE_t *const ctx
){
#if DRYSPONGE_DBG_EN
printf(" G entry %lu:\n",ctx->fcnt);
DRYSPONGE_print_state(ctx);
#endif
ctx->fcnt++;
DRYSPONGE_xor64(ctx->r,ctx->r,ctx->r);//r=0
for(unsigned int j = 0;j<ctx->rounds;j++){
#if DRYSPONGE_DBG_EN >= DRYSPONGE_DBG_ROUND_IO
printf(" CoreRound entry %d:\n",j);
DRYSPONGE_print_state(ctx);
#endif
DRYSPONGE_CoreRound(ctx,j);
uint32_t r32[DRYSPONGE_BLOCKSIZE32];
uint32_t cpart[DRYSPONGE_BLOCKSIZE32];
memcpy(r32,ctx->r,sizeof(r32));
for(unsigned int k=0;k<DRYSPONGE_ACCUMULATE_FACTOR;k++){
memcpy(cpart,ctx->c+k*DRYSPONGE_BLOCKSIZE64,sizeof(cpart));
for(unsigned int i=0;i<DRYSPONGE_BLOCKSIZE32;i++){
r32[i]^=cpart[(i+k)%DRYSPONGE_BLOCKSIZE32];
}
}
memcpy(ctx->r,r32,sizeof(r32));
}
}
#endif
#ifdef DRYSPONGE_OPT_F
static void DRYSPONGE_DomainSeparator(
DRYSPONGE_EXT_t *const ext,
unsigned int dsinfo
){
*ext = dsinfo;
}
void drygascon128_f(uint64_t* x, uint32_t*in,uint32_t ds,uint32_t rounds);
static void DRYSPONGE_f(
DRYSPONGE_t *const ctx,
const uint8_t *const i
){
DRYSPONGE_OPT_F((uint64_t*)&(ctx->c),(uint32_t*)i,(uint32_t)ctx->ext,ctx->rounds);
ctx->ext=0;
}
#else
static void DRYSPONGE_f(
DRYSPONGE_t *const ctx,
const uint8_t *const i
){
#if DRYSPONGE_DBG_EN
printf(" F entry %lu:\n",ctx->fcnt);
DRYSPONGE_print_state(ctx);
print_bytes_sep(" I = ",i,DRYSPONGE_BLOCKSIZE,"\n","");
#endif
DRYSPONGE_MixPhase(ctx,i);
#if DRYSPONGE_DBG_EN >= DRYSPONGE_DBG_ROUND_IO
printf(" After mix phase:\n");
DRYSPONGE_print_state(ctx);
#endif
DRYSPONGE_g(ctx);
}
#endif
static void DRYSPONGE_set_key(
DRYSPONGE_t *const ctx,
const uint8_t *const key,
const unsigned int keylen
){
assert(DRYSPONGE_KEYSIZE<=keylen);
const unsigned int midkeysize = DRYSPONGE_KEYSIZE+DRYSPONGE_XSIZE;
const unsigned int fullkeysize = DRYSPONGE_CAPACITYSIZE+DRYSPONGE_XSIZE;
if(DRYSPONGE_KEYSIZE!=keylen){//all words for x assumed to be different
if(fullkeysize == keylen){
memcpy(ctx->c,key,DRYSPONGE_CAPACITYSIZE);
memcpy(ctx->x,key+DRYSPONGE_CAPACITYSIZE,DRYSPONGE_XSIZE);
} else {
uint8_t c[DRYSPONGE_CAPACITYSIZE];
uint8_t x[DRYSPONGE_XSIZE];
assert(midkeysize==keylen);
for(unsigned int i=0;i<DRYSPONGE_CAPACITYSIZE;i++){
c[i] = key[i%DRYSPONGE_KEYSIZE];
}
for(unsigned int i=0;i<DRYSPONGE_XSIZE;i++){
x[i] = key[DRYSPONGE_KEYSIZE+i];
}
memcpy(ctx->c,c,DRYSPONGE_CAPACITYSIZE);
memcpy(ctx->x,x,DRYSPONGE_XSIZE);
}
}else{
uint8_t c[DRYSPONGE_CAPACITYSIZE];
for(unsigned int i=0;i<DRYSPONGE_CAPACITYSIZE;i++){
c[i] = key[i%DRYSPONGE_KEYSIZE];
}
memcpy(ctx->c,c,DRYSPONGE_CAPACITYSIZE);
DRYSPONGE_CoreRound(ctx,0);
//need to fixup x such that all words are different
unsigned int modified=1;
while(modified){
uint32_t c32[DRYSPONGE_CAPACITYSIZE32];
memcpy(c32,ctx->c,DRYSPONGE_CAPACITYSIZE);
modified=0;
for(unsigned int i=0;i<DRYSPONGE_XSIZE32-1;i++){
for(unsigned int j=i+1;j<DRYSPONGE_XSIZE32;j++){
if(c32[i]==c32[j]){
DRYSPONGE_CoreRound(ctx,0);
modified=1;
break;
}
}
if(modified) break;
}
}
memcpy(ctx->x,ctx->c,DRYSPONGE_XSIZE);
memcpy(ctx->c,key,DRYSPONGE_XSIZE);
}
uint32_t x32[DRYSPONGE_XSIZE32];// = (uint32_t *const)ctx->x;
memcpy(x32,ctx->x,DRYSPONGE_XSIZE);
//sanity check: all words in x shall be different
for(unsigned int i=0;i<DRYSPONGE_XSIZE32-1;i++){
for(unsigned int j=i+1;j<DRYSPONGE_XSIZE32;j++){
assert(x32[i]!=x32[j]);
}
}
}
static unsigned int DRYSPONGE_padding(
const uint8_t *const ib,//one block of input or less
uintptr_t iblen,
uint8_t *const ob//exactly one block
){
assert(iblen<=DRYSPONGE_BLOCKSIZE);
memcpy(ob,ib,iblen);
unsigned int padded = 0;
if(iblen<DRYSPONGE_BLOCKSIZE){
ob[iblen] = 0x01;
if(iblen+1<DRYSPONGE_BLOCKSIZE){
memset(ob+iblen+1,0,DRYSPONGE_BLOCKSIZE-iblen-1);
}
padded = 1;
}
return padded;
}
static void DRYSPONGE_absorb_only(
DRYSPONGE_t *const ctx,
const uint8_t *const ad,
size_t alen,
unsigned int ds,
unsigned int finalize
){
const uint8_t *iad = ad;
size_t a = (alen + DRYSPONGE_BLOCKSIZE - 1) / DRYSPONGE_BLOCKSIZE;
if(a){
for(size_t i = 0; i<a-1; i++){//process all blocks except last one
DRYSPONGE_f(ctx,iad);
iad+=DRYSPONGE_BLOCKSIZE;
}
}
uint8_t last_block[DRYSPONGE_BLOCKSIZE];
uintptr_t remaining = ad+alen-iad;
uint8_t apad = DRYSPONGE_padding(iad,remaining,last_block);
DRYSPONGE_DomainSeparator(DRYSPONGE_EXT_ARG,DRYSPONGE_DSINFO(apad,ds,finalize));
DRYSPONGE_f(ctx,last_block);
}
static void DRYSPONGE_squeez_only(
DRYSPONGE_t *const ctx,
uint8_t *out,
unsigned int remaining
){
while(remaining){
unsigned int len = remaining > DRYSPONGE_BLOCKSIZE ? DRYSPONGE_BLOCKSIZE : remaining;
memcpy(out,ctx->r,len);
out+=len;
remaining-=len;
if(remaining){
DRYSPONGE_g(ctx);
}
}
}
static void DRYSPONGE_init_ctx(
DRYSPONGE_t *const ctx
){
#ifdef DRYSPONGE_EXT
memset(DRYSPONGE_EXT_ARG,0,sizeof(DRYSPONGE_EXT_t));
#endif
ctx->fcnt=0;
memset(ctx->r,0x00,DRYSPONGE_BLOCKSIZE);
}
static void DRYSPONGE_hash(
const uint8_t *const message,
const size_t mlen,
uint8_t *const digest
){
DRYSPONGE_t ctx_storage;
DRYSPONGE_t *const ctx = &ctx_storage;
DRYSPONGE_init_ctx(ctx);
ctx->rounds=DRYSPONGE_ROUNDS;
#if DRYSPONGE_DBG_EN
printf("Hashing %lu bytes message: ",mlen);
print_bytes_sep("",message,mlen,"\n","");
#endif
const uint64_t CST_H[] = {
0xd308a385886a3f24,
0x447370032e8a1913,
0xd0319f29223809a4,
0x896c4eec98fa2e08,
0x7713d038e6212845,
0x6c0ce934cf6654be,
0xdd507cc9b729acc0,
0x170947b5b5d5843f,
0x1bfb7989d9d51692,
0xacb5df98a60b31d1,
0xb7df1ad0db72fd2f,
0x967e266aedafe1b8,
0x997f2cf145907cba,
0xf76c91b34799a124,
0x16fc8e85e2f20108,
0x694e5771d8206963,
};
DRYSPONGE_set_key(ctx,(const uint8_t*)CST_H,DRYSPONGE_KEYSIZE+DRYSPONGE_XSIZE);
DRYSPONGE_absorb_only(ctx,message,mlen,DRYSPONGE_DS,1);
DRYSPONGE_squeez_only(ctx,digest,DRYSPONGE_DIGESTSIZE);
#if DRYSPONGE_DBG_EN
printf(" Final state:\n");
DRYSPONGE_print_state(ctx);
print_bytes_sep(" Digest: ",digest,DRYSPONGE_DIGESTSIZE,"\n","");
#endif
}
static void DRYSPONGE_init(
DRYSPONGE_t *const ctx,
const uint8_t *const key,
const unsigned int klen,
const uint8_t *const nonce,
uint8_t *out_buffer,//output buffer
unsigned int finalize
){
DRYSPONGE_init_ctx(ctx);
ctx->rounds=DRYSPONGE_ROUNDS;
DRYSPONGE_set_key(ctx,key,klen);
ctx->obuf = out_buffer;
DRYSPONGE_DomainSeparator(DRYSPONGE_EXT_ARG,DRYSPONGE_DSINFO(0,DRYSPONGE_DD,finalize));
ctx->rounds=DRYSPONGE_INIT_ROUNDS;
#if DRYSPONGE_NONCESIZE>DRYSPONGE_BLOCKSIZE
assert(0==(DRYSPONGE_NONCESIZE%DRYSPONGE_BLOCKSIZE));
unsigned int nloops = DRYSPONGE_DIVUP(DRYSPONGE_NONCESIZE,DRYSPONGE_BLOCKSIZE);
for(unsigned int i=0;i<nloops-1;i++){
DRYSPONGE_f(ctx,nonce+i*DRYSPONGE_BLOCKSIZE);
}
DRYSPONGE_DomainSeparator(DRYSPONGE_EXT_ARG,DRYSPONGE_DSINFO(0,DRYSPONGE_DD,finalize));
DRYSPONGE_f(ctx,nonce+(nloops-1)*DRYSPONGE_BLOCKSIZE);
#else
uint8_t block[DRYSPONGE_BLOCKSIZE] = {0};
memcpy(block,nonce,DRYSPONGE_NONCESIZE);
DRYSPONGE_DomainSeparator(DRYSPONGE_EXT_ARG,DRYSPONGE_DSINFO(0,DRYSPONGE_DD,finalize));
DRYSPONGE_f(ctx,block);
#endif
ctx->rounds=DRYSPONGE_ROUNDS;
}
static void DRYSPONGE_enc_core(
DRYSPONGE_t *const ctx,
const uint64_t *const ib//exactly one block of input
){
DRYSPONGE_xor((uint8_t *)ctx->r,(uint8_t *)ib,ctx->obuf);
DRYSPONGE_f(ctx,(uint8_t *)ib);
ctx->obuf+=DRYSPONGE_BLOCKSIZE;
}
static void DRYSPONGE_enc_core_aligned(
DRYSPONGE_t *const ctx,
const uint64_t *const ib//exactly one block of input
){
assert((((uintptr_t)ctx->obuf)%8) == 0);
DRYSPONGE_xor64(ctx->r,ib,(uint64_t*const)ctx->obuf);
DRYSPONGE_f(ctx,(uint8_t *)ib);
ctx->obuf+=DRYSPONGE_BLOCKSIZE;
}
static const uint8_t* DRYSPONGE_enc_blocks(
DRYSPONGE_t *const ctx,
const uint8_t *im,//whole message
size_t m
){
(void)DRYSPONGE_load32;
(void)DRYSPONGE_store32;
(void)DRYSPONGE_load64;
(void)DRYSPONGE_store64;
uint64_t buf64[DRYSPONGE_BLOCKSIZE64];
const uint64_t *ib64;
#if DRYSPONGE_BLOCKSIZE % ALIGN64
unsigned int input_aligned = 0;
unsigned int output_aligned = 0;
#else
unsigned int input_aligned = 0==(((uintptr_t)im)%ALIGN64);
unsigned int output_aligned = 0==(((uintptr_t)ctx->obuf)%ALIGN64);
#endif
if(input_aligned && output_aligned){
for(size_t i = 0; i<m; i++){
ib64 = (const uint64_t*)im;
DRYSPONGE_enc_core_aligned(ctx,ib64);
im+=DRYSPONGE_BLOCKSIZE;
}
}else{
ib64 = buf64;
for(size_t i = 0; i<m; i++){
memcpy(buf64,im,DRYSPONGE_BLOCKSIZE);
DRYSPONGE_enc_core(ctx,ib64);//input is now aligned but output may not
im+=DRYSPONGE_BLOCKSIZE;
}
}
return im;
}
static void DRYSPONGE_dec_core(
DRYSPONGE_t *const ctx,
const uint8_t *const ib//exactly one block of input
){
DRYSPONGE_xor((uint8_t *)ctx->r,ib,ctx->obuf);
DRYSPONGE_f(ctx,ctx->obuf);
ctx->obuf+=DRYSPONGE_BLOCKSIZE;
}
static void DRYSPONGE_dec_core_aligned(
DRYSPONGE_t *const ctx,
const uint64_t *const ib//exactly one block of input
){
DRYSPONGE_xor64(ctx->r,ib,(uint64_t*const)ctx->obuf);
DRYSPONGE_f(ctx,ctx->obuf);
ctx->obuf+=DRYSPONGE_BLOCKSIZE;
}
static const uint8_t* DRYSPONGE_dec_blocks(
DRYSPONGE_t *const ctx,
const uint8_t *im,//whole message
size_t m
){
const uint64_t *ib64;
#if DRYSPONGE_BLOCKSIZE % ALIGN64
unsigned int input_aligned = 0;
unsigned int output_aligned = 0;
#else
unsigned int input_aligned = 0==(((uintptr_t)im)%ALIGN64);
unsigned int output_aligned = 0==(((uintptr_t)ctx->obuf)%ALIGN64);
#endif
if(input_aligned && output_aligned){
for(size_t i = 0; i<m; i++){
ib64 = (const uint64_t*)im;
DRYSPONGE_dec_core_aligned(ctx,ib64);
im+=DRYSPONGE_BLOCKSIZE;
}
}else{
for(size_t i = 0; i<m; i++){
DRYSPONGE_dec_core(ctx,im);
im+=DRYSPONGE_BLOCKSIZE;
}
}
return im;
}
static void DRYSPONGE_enc(
const uint8_t *const key,
const unsigned int klen,
const uint8_t *const nonce,
const uint8_t *const message,
const size_t mlen,
const uint8_t * const ad,
const size_t alen,
uint8_t *ciphertext,
size_t *clen
){
const uint8_t *im = message;
DRYSPONGE_t ctx_storage;
DRYSPONGE_t *const ctx = &ctx_storage;
unsigned int finalize = (mlen|alen) ? 0 : 1;
DRYSPONGE_init(
ctx,
key,
klen,
nonce,
ciphertext,
finalize
);
if(alen){
finalize = mlen ? 0 : 1;
DRYSPONGE_absorb_only(ctx,ad,alen,DRYSPONGE_DA,finalize);
}
if(mlen){
size_t m = (mlen + DRYSPONGE_BLOCKSIZE - 1) / DRYSPONGE_BLOCKSIZE;
im=DRYSPONGE_enc_blocks(ctx,im,m-1);//process all blocks except the last one
uint64_t last_block64[DRYSPONGE_BLOCKSIZE64];
uint8_t*last_block=(uint8_t*)last_block64;
unsigned int remaining = message+mlen-im;
uint8_t mpad = DRYSPONGE_padding(im,remaining,last_block);
DRYSPONGE_DomainSeparator(DRYSPONGE_EXT_ARG,DRYSPONGE_DSINFO(mpad,DRYSPONGE_DM,1));
DRYSPONGE_enc_core(ctx,last_block64);//writing full block is fine since we still have the area reserved for the tag
ctx->obuf = ciphertext + mlen;//fix the size
}
DRYSPONGE_squeez_only(ctx,ctx->obuf,DRYSPONGE_TAGSIZE);
*clen = mlen+DRYSPONGE_TAGSIZE;
#if DRYSPONGE_DBG_EN
printf(" Final state:\n");
DRYSPONGE_print_state(ctx);
print_bytes_sep(" CipherText: ",ciphertext,*clen,"\n","");
#endif
}
//WARNING the function writes plaintext into "message" before checking the tag.
//It is the responsability of the caller to ensure that the "message" buffer is
//not accessible by anything until this function has return.
static int DRYSPONGE_dec(
const uint8_t *const key,
const unsigned int klen,
const uint8_t *const nonce,
const uint8_t *const ciphertext,
const size_t clen,
const uint8_t * const ad,
const size_t alen,
uint8_t *message
){
if(clen<DRYSPONGE_TAGSIZE) return -1;
size_t mlen = clen - DRYSPONGE_TAGSIZE;
const uint8_t *im = ciphertext;
DRYSPONGE_t ctx_storage;
DRYSPONGE_t *const ctx = &ctx_storage;
unsigned int finalize = (mlen|alen) ? 0 : 1;
DRYSPONGE_init(
ctx,
key,
klen,
nonce,
message,
finalize
);
if(alen){
finalize = mlen ? 0 : 1;
DRYSPONGE_absorb_only(ctx,ad,alen,DRYSPONGE_DA,finalize);
}
if(mlen){
size_t m = (mlen + DRYSPONGE_BLOCKSIZE - 1) / DRYSPONGE_BLOCKSIZE;
im=DRYSPONGE_dec_blocks(ctx,im,m-1);
uint64_t last_block64[DRYSPONGE_BLOCKSIZE64];
uint8_t*last_block=(uint8_t*)last_block64;
unsigned int remaining = ciphertext+mlen-im;
memcpy(last_block,im,remaining);
DRYSPONGE_xor64(ctx->r,last_block64,last_block64);
uint8_t mpad = DRYSPONGE_padding(last_block,remaining,last_block);
im+=remaining;
DRYSPONGE_DomainSeparator(DRYSPONGE_EXT_ARG,DRYSPONGE_DSINFO(mpad,DRYSPONGE_DM,1));
memcpy(ctx->obuf,last_block,remaining);
DRYSPONGE_f(ctx,last_block);
}
uint64_t tag64[DRYSPONGE_TAGSIZE64];
uint8_t*tag = (uint8_t*)tag64;
DRYSPONGE_squeez_only(ctx,tag,DRYSPONGE_TAGSIZE);
DRYSPONGE_DBG(print_bytes_sep("expected tag=",im,DRYSPONGE_TAGSIZE,"\n",""));
DRYSPONGE_DBG(print_bytes_sep("computed tag=",tag,DRYSPONGE_TAGSIZE,"\n",""));
if(memcmp(tag,im,DRYSPONGE_TAGSIZE)){
memset(message,0,mlen);//erase all output
return ~DRYSPONGE_PASS;
}
return DRYSPONGE_PASS;
}
#endif
#include "crypto_aead.h"
#define DRYSPONGE_OPT_G drygascon128_g
#define DRYSPONGE_OPT_F drygascon128_f
#include "drysponge.h"
/**
generating a ciphertext c[0],c[1],...,c[*clen-1]
from a plaintext m[0],m[1],...,m[mlen-1]
and associated data ad[0],ad[1],...,ad[adlen-1]
and nonce npub[0],npub[1],...
and secret key k[0],k[1],...
the implementation shall not use nsec
*/
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void) nsec; //avoid warning
(void) DRYSPONGE_hash; //avoid warning
size_t impl_clen;
DRYSPONGE_enc(k,DRYSPONGE_KEYSIZE,npub,m,mlen,ad,adlen,c,&impl_clen);
*clen = impl_clen;
return 0;
}
/**
the code for the AEAD implementation goes here,
generating a plaintext m[0],m[1],...,m[*mlen-1]
and secret message number nsec[0],nsec[1],...
from a ciphertext c[0],c[1],...,c[clen-1]
and associated data ad[0],ad[1],...,ad[adlen-1]
and nonce number npub[0],npub[1],...
and secret key k[0],k[1],...
*/
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
){
(void) nsec; //avoid warning
if(DRYSPONGE_PASS!=DRYSPONGE_dec(k,DRYSPONGE_KEYSIZE,npub,c,clen,ad,adlen,m))
return -1;
*mlen = clen - DRYSPONGE_TAGSIZE;
return 0;
}
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#ifndef GIFT_COFB_H_
#define GIFT_COFB_H_
#define TAG_SIZE 16
#define COFB_ENCRYPT 1
#define COFB_DECRYPT 0
#define XOR_BLOCK(x, y, z) ({ \
(x)[0] = (y)[0] ^ (z)[0]; \
(x)[1] = (y)[1] ^ (z)[1]; \
(x)[2] = (y)[2] ^ (z)[2]; \
(x)[3] = (y)[3] ^ (z)[3]; \
})
#define XOR_TOP_BAR_BLOCK(x, y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
})
#endif // GIFT_COFB_H_
\ No newline at end of file
#include <string.h>
#include "api.h"
#include "cofb.h"
#include "giftb128.h"
static inline void padding(u32* d, const u32* s, const u32 no_of_bytes){
u32 i;
if (no_of_bytes == 0) {
d[0] = 0x00000080; // little-endian
d[1] = 0x00000000;
d[2] = 0x00000000;
d[3] = 0x00000000;
}
else if (no_of_bytes < GIFT128_BLOCK_SIZE) {
for (i = 0; i < no_of_bytes/4+1; i++)
d[i] = s[i];
d[i-1] &= ~(0xffffffffL << (no_of_bytes % 4)*8);
d[i-1] |= 0x00000080L << (no_of_bytes % 4)*8;
for (; i < 4; i++)
d[i] = 0x00000000;
}
else {
d[0] = s[0];
d[1] = s[1];
d[2] = s[2];
d[3] = s[3];
}
}
static inline void double_half_block(u32* x) {
u32 tmp0;
tmp0 = (x)[0];
(x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15);
(x)[0] |= ((x)[1] & 0x80808080) << 17;
(x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15);
(x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24;
}
static inline void triple_half_block(u32* x) {
u32 tmp0, tmp1;
tmp0 = (x)[0];
tmp1 = (x)[1];
(x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15);
(x)[0] |= ((x)[1] & 0x80808080) << 17;
(x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15);
(x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24;
(x)[0] ^= tmp0;
(x)[1] ^= tmp1;
}
static inline void g(u32 *x) {
u32 tmp0, tmp1;
tmp0 = (x)[0];
tmp1 = (x)[1];
(x)[0] = (x)[2];
(x)[1] = (x)[3];
(x)[2] = ((tmp0 & 0x7f7f7f7f) << 1) | ((tmp0 & 0x80808080) >> 15);
(x)[2] |= ((tmp1 & 0x80808080) << 17);
(x)[3] = ((tmp1 & 0x7f7f7f7f) << 1) | ((tmp1 & 0x80808080) >> 15);
(x)[3] |= ((tmp0 & 0x80808080) << 17);
}
static inline void rho1(u32* d, u32* y, u32* m, u32 n) {
g(y);
padding(d,m,n);
XOR_BLOCK(d, d, y);
}
static inline void rho(u32* y, u32* m, u32* x, u32* c, u32 n) {
XOR_BLOCK(c, y, m);
rho1(x, y, m, n);
}
static inline void rho_prime(u32* y, u32*c, u32* x, u32* m, u32 n) {
XOR_BLOCK(m, y, c);
rho1(x, y, m, n);
}
/****************************************************************************
* Constant-time implementation of the GIFT-COFB authenticated cipher based on
* fixsliced GIFTb-128. Encryption/decryption is handled by the same function,
* depending on the 'mode' parameter (1/0).
****************************************************************************/
int giftcofb_crypt(u8* out, const u8* key, const u8* nonce, const u8* ad,
u32 ad_len, const u8* in, u32 in_len, const int encrypting) {
u32 tmp0, tmp1, emptyA, emptyM, offset[2];
u32 input[4], rkey[80];
u8 Y[GIFT128_BLOCK_SIZE];
if (!encrypting) {
if (in_len < TAG_SIZE)
return -1;
in_len -= TAG_SIZE;
}
if(ad_len == 0)
emptyA = 1;
else
emptyA = 0;
if(in_len == 0)
emptyM =1;
else
emptyM = 0;
gift128_keyschedule(key, rkey);
giftb128_encrypt_block(Y, rkey, nonce);
offset[0] = ((u32*)Y)[0];
offset[1] = ((u32*)Y)[1];
while(ad_len > GIFT128_BLOCK_SIZE){
rho1(input, (u32*)Y, (u32*)ad, GIFT128_BLOCK_SIZE);
double_half_block(offset);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
ad += GIFT128_BLOCK_SIZE;
ad_len -= GIFT128_BLOCK_SIZE;
}
triple_half_block(offset);
if((ad_len % GIFT128_BLOCK_SIZE != 0) || (emptyA))
triple_half_block(offset);
if(emptyM) {
triple_half_block(offset);
triple_half_block(offset);
}
rho1(input, (u32*)Y, (u32*)ad, ad_len);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
while (in_len > GIFT128_BLOCK_SIZE){
double_half_block(offset);
if (encrypting)
rho((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE);
else
rho_prime((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
in += GIFT128_BLOCK_SIZE;
out += GIFT128_BLOCK_SIZE;
in_len -= GIFT128_BLOCK_SIZE;
}
if(!emptyM){
triple_half_block(offset);
if(in_len % GIFT128_BLOCK_SIZE != 0)
triple_half_block(offset);
if (encrypting) {
rho((u32*)Y, (u32*)in, input, (u32*)out, in_len);
out += in_len;
}
else {
rho_prime((u32*)Y, (u32*)in, input, (u32*)out, in_len);
in += in_len;
}
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
}
if (encrypting) { // encryption mode
memcpy(out, Y, TAG_SIZE);
return 0;
}
// decrypting
tmp0 = 0;
for(tmp1 = 0; tmp1 < TAG_SIZE; tmp1++)
tmp0 |= in[tmp1] ^ Y[tmp1];
return tmp0;
}
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
(void)nsec;
*clen = mlen + TAG_SIZE;
return giftcofb_crypt(c, k, npub, ad, adlen, m, mlen, COFB_ENCRYPT);
}
int crypto_aead_decrypt(unsigned char* m, unsigned long long *mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char *k) {
(void)nsec;
*mlen = clen - TAG_SIZE;
return giftcofb_crypt(m, k, npub, ad, adlen, c, clen, COFB_DECRYPT);
}
\ No newline at end of file
#ifndef GIFT128_H_
#define GIFT128_H_
#define KEY_SIZE 16
#define GIFT128_BLOCK_SIZE 16
typedef unsigned char u8;
typedef unsigned int u32;
extern void gift128_keyschedule(const u8* key, u32* rkey);
extern void giftb128_encrypt_block(u8* out_block, const u32* rkey, const u8* in_block);
#endif // GIFT128_H_
\ No newline at end of file
/****************************************************************************
* Balanced ARM assembly implementation of the GIFT-128 block cipher. This
* implementation provides efficiency with limited impact on the code size.
* See "Fixslicing: A New GIFT Representation" paper available at
* https:// for more details.
****************************************************************************/
.syntax unified
.thumb
/*****************************************************************************
* Round constants look-up table according to the fixsliced representation.
*****************************************************************************/
.align 2
.type rconst,%object
rconst:
.word 0x10000008, 0x80018000, 0x54000002, 0x01010181
.word 0x8000001f, 0x10888880, 0x6001e000, 0x51500002
.word 0x03030180, 0x8000002f, 0x10088880, 0x60016000
.word 0x41500002, 0x03030080, 0x80000027, 0x10008880
.word 0x4001e000, 0x11500002, 0x03020180, 0x8000002b
.word 0x10080880, 0x60014000, 0x01400002, 0x02020080
.word 0x80000021, 0x10000080, 0x0001c000, 0x51000002
.word 0x03010180, 0x8000002e, 0x10088800, 0x60012000
.word 0x40500002, 0x01030080, 0x80000006, 0x10008808
.word 0xc001a000, 0x14500002, 0x01020181, 0x8000001a
.align 2
classical_key_update:
and r2, r10, r7, lsr #12
and r3, r7, r9
orr r2, r2, r3, lsl #4
and r3, r12, r7, lsr #2
orr r2, r2, r3
and r7, r7, #0x00030000
orr r7, r2, r7, lsl #14
str.w r7, [r1, #4] //1st classical key update
str.w r5, [r1], #8 //1st classical key update
and r2, r10, r6, lsr #12
and r3, r6, r9
orr r2, r2, r3, lsl #4
and r3, r12, r6, lsr #2
orr r2, r2, r3
and r6, r6, #0x00030000
orr r6, r2, r6, lsl #14
str.w r6, [r1, #4] //2nd classical key update
str.w r4, [r1], #8 //2nd classical key update
and r2, r10, r5, lsr #12
and r3, r5, r9
orr r2, r2, r3, lsl #4
and r3, r12, r5, lsr #2
orr r2, r2, r3
and r5, r5, #0x00030000
orr r5, r2, r5, lsl #14
str.w r5, [r1, #4] //3rd classical key update
str.w r7, [r1], #8 //3rd classical key update
and r2, r10, r4, lsr #12
and r3, r4, r9
orr r2, r2, r3, lsl #4
and r3, r12, r4, lsr #2
orr r2, r2, r3
and r4, r4, #0x00030000
orr r4, r2, r4, lsl #14
str.w r4, [r1, #4] //4th classical key update
str.w r6, [r1], #8 //4th classical key update
bx lr
.align 2
rearrange_rkey_0:
ldrd r6, r4, [r1]
eor r12, r6, r6, lsr #9
and r12, r12, r3
eor r6, r12
eor r6, r6, r12, lsl #9 //SWAPMOVE(r6, r6, 0x00550055, 9);
eor r12, r4, r4, lsr #9
and r12, r12, r3
eor r4, r12
eor r4, r4, r12, lsl #9 //SWAPMOVE(r4, r4, 0x00550055, 9);
eor r12, r6, r6, lsr #18
and r12, r12, r10
eor r6, r12
eor r6, r6, r12, lsl #18 //SWAPMOVE(r6, r6, 0x3333, 18);
eor r12, r4, r4, lsr #18
and r12, r12, r10
eor r4, r12
eor r4, r4, r12, lsl #18 //SWAPMOVE(r4, r4, 0x3333, 18);
eor r12, r6, r6, lsr #12
and r12, r12, r11
eor r6, r12
eor r6, r6, r12, lsl #12 //SWAPMOVE(r6, r6, 0x000f000f, 12);
eor r12, r4, r4, lsr #12
and r12, r12, r11
eor r4, r12
eor r4, r4, r12, lsl #12 //SWAPMOVE(r4, r4, 0x000f000f, 12);
eor r12, r6, r6, lsr #24
and r12, r12, #0xff
eor r6, r12
eor r6, r6, r12, lsl #24 //SWAPMOVE(r6, r6, 0x000000ff, 24);
eor r12, r4, r4, lsr #24
and r12, r12, #0xff
eor r4, r12
eor r4, r4, r12, lsl #24 //SWAPMOVE(r4, r4, 0x000000ff, 24);
str.w r6, [r1]
str.w r4, [r1, #4]
bx lr
.align 2
rearrange_rkey_1:
ldrd r5, r7, [r1]
eor r8, r7, r7, lsr #3
and r8, r8, r3
eor r7, r8
eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x11111111, 3);
eor r8, r5, r5, lsr #3
and r8, r8, r3
eor r5, r8
eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x11111111, 3);
eor r8, r7, r7, lsr #6
and r8, r8, r10
eor r7, r8
eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x03030303, 6);
eor r8, r5, r5, lsr #6
and r8, r8, r10
eor r5, r8
eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x03030303, 6);
eor r8, r7, r7, lsr #12
and r8, r8, r11
eor r7, r8
eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r5, r5, lsr #12
and r8, r8, r11
eor r5, r8
eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r7, r7, lsr #24
and r8, r8, #0xff
eor r7, r8
eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24);
eor r8, r5, r5, lsr #24
and r8, r8, #0xff
eor r5, r8
eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24);
str.w r5, [r1]
str.w r7, [r1, #4]
bx lr
.align 2
rearrange_rkey_2:
ldrd r5, r7, [r1]
eor r8, r7, r7, lsr #15
and r8, r8, r3
eor r7, r8
eor r7, r7, r8, lsl #15 //SWAPMOVE(r7, r7, 0x0000aaaa, 15);
eor r8, r5, r5, lsr #15
and r8, r8, r3
eor r5, r8
eor r5, r5, r8, lsl #15 //SWAPMOVE(r5, r5, 0x0000aaaa, 15);
eor r8, r7, r7, lsr #18
and r8, r8, r10
eor r7, r8
eor r7, r7, r8, lsl #18 //SWAPMOVE(r7, r7, 0x00003333, 18);
eor r8, r5, r5, lsr #18
and r8, r8, r10
eor r5, r8
eor r5, r5, r8, lsl #18 //SWAPMOVE(r5, r5, 0x00003333, 18);
eor r8, r7, r7, lsr #12
and r8, r8, r11
eor r7, r8
eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r5, r5, lsr #12
and r8, r8, r11
eor r5, r8
eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r7, r7, lsr #24
and r8, r8, #0xff
eor r7, r8
eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x00000ff, 24);
eor r8, r5, r5, lsr #24
and r8, r8, #0xff
eor r5, r8
eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24);
str.w r5, [r1]
str.w r7, [r1, #4]
bx lr
.align 2
rearrange_rkey_3:
ldrd r5, r7, [r1]
eor r8, r7, r7, lsr #3
and r8, r8, r3
eor r7, r8
eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x0a0a0a0a, 3);
eor r8, r5, r5, lsr #3
and r8, r8, r3
eor r5, r8
eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x0a0a0a0a, 3);
eor r8, r7, r7, lsr #6
and r8, r8, r10
eor r7, r8
eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x00cc00cc, 6);
eor r8, r5, r5, lsr #6
and r8, r8, r10
eor r5, r8
eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x00cc00cc, 6);
eor r8, r7, r7, lsr #12
and r8, r8, r11
eor r7, r8
eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r5, r5, lsr #12
and r8, r8, r11
eor r5, r8
eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r7, r7, lsr #24
and r8, r8, #0xff
eor r7, r8
eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24);
eor r8, r5, r5, lsr #24
and r8, r8, #0xff
eor r5, r8
eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24);
str.w r5, [r1]
str.w r7, [r1, #4]
bx lr
.align 2
key_update_0:
ldrd r4, r5, [r1], #80
and r2, r12, r4, ror #24
and r4, r4, r11
orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4)
eor r2, r4, r4, lsr #1
and r2, r2, r8
eor r4, r4, r2
eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1)
eor r2, r5, r5, lsr #16
and r2, r2, r10
eor r5, r5, r2
eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16)
eor r2, r5, r5, lsr #1
and r2, r2, r9
eor r5, r5, r2
eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1)
str.w r4, [r1, #4]
str.w r5, [r1], #80
and r2, r12, r5, ror #24
and r5, r5, r11
orr r5, r2, r5, ror #16 //KEY_TRIPLE_UPDATE_1(r5)
eor r2, r5, r5, lsr #1
and r2, r2, r8
eor r5, r5, r2
eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x55551100, 1)
eor r2, r4, r4, lsr #16
and r2, r2, r10
eor r4, r4, r2
eor r4, r4, r2, lsl #16 //SWAPMOVE(r4, r4, 0x00003333, 16)
eor r2, r4, r4, lsr #1
and r2, r2, r9
eor r4, r4, r2
eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x555544444, 1)
str.w r5, [r1, #4]
str.w r4, [r1], #80
and r2, r12, r4, ror #24
and r4, r4, r11
orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4)
eor r2, r4, r4, lsr #1
and r2, r2, r8
eor r4, r4, r2
eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1)
eor r2, r5, r5, lsr #16
and r2, r2, r10
eor r5, r5, r2
eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16)
eor r2, r5, r5, lsr #1
and r2, r2, r9
eor r5, r5, r2
eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1)
str.w r4, [r1, #4]
str.w r5, [r1], #80
bx lr
.align 2
key_update_1:
ldrd r4, r5, [r1], #80
and r2, r9, r4, lsr #6
and r3, r4, r10, lsl #8
orr r2, r2, r3, lsl #2
and r3, r8, r4, lsr #5
orr r2, r2, r3
and r4, r4, r7
orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4)
and r2, r12, r5, lsr #4
and r3, r5, r12
orr r2, r2, r3, lsl #4
and r3, r11, r5, lsr #6
orr r2, r2, r3
and r5, r5, r10
orr r5, r2, r5, lsl #2 //KEY_DOUBLE_UPDATE_2(r5)
str.w r4, [r1, #4]
str.w r5, [r1], #80
and r2, r9, r5, lsr #6
and r3, r5, r10, lsl #8
orr r2, r2, r3, lsl #2
and r3, r8, r5, lsr #5
orr r2, r2, r3
and r5, r5, r7
orr r5, r2, r5, lsl #3 //KEY_TRIPLE_UPDATE_2(r5)
and r2, r12, r4, lsr #4
and r3, r4, r12
orr r2, r2, r3, lsl #4
and r3, r11, r4, lsr #6
orr r2, r2, r3
and r4, r4, r10
orr r4, r2, r4, lsl #2 //KEY_DOUBLE_UPDATE_2(r4)
str.w r5, [r1, #4]
str.w r4, [r1], #80
and r2, r9, r4, lsr #6
and r3, r4, r10, lsl #8
orr r2, r2, r3, lsl #2
and r3, r8, r4, lsr #5
orr r2, r2, r3
and r4, r4, r7
orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4)
and r2, r12, r5, lsr #4
and r3, r5, r12
orr r2, r2, r3, lsl #4
and r3, r11, r5, lsr #6
orr r2, r2, r3
and r5, r5, r10
orr r5, r2, r5, lsl#2 //KEY_DOUBLE_UPDATE_2(r5)
str.w r4, [r1, #4]
str.w r5, [r1], #80
bx lr
.align 2
key_update_2:
ldrd r4, r5, [r1], #80
and r2, r12, r4, ror #24
and r4, r11, r4, ror #20
orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r4)
and r2, r11, r5, ror #24
and r5, r12, r5, ror #16
orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r5)
str.w r4, [r1, #4]
str.w r5, [r1], #80
and r2, r12, r5, ror #24
and r5, r11, r5, ror #20
orr r5, r5, r2 //KEY_TRIPLE_UPDATE_2(r5)
and r2, r11, r4, ror #24
and r4, r12, r4, ror #16
orr r4, r4, r2 //KEY_DOUBLE_UPDATE_2(r4)
str.w r5, [r1, #4]
str.w r4, [r1], #80
and r2, r12, r4, ror #24
and r4, r11, r4, ror #20
orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r4)
and r2, r11, r5, ror #24
and r5, r12, r5, ror #16
orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r5)
str.w r4, [r1, #4]
str.w r5, [r1], #80
bx lr
.align 2
key_update_3:
ldrd r4, r5, [r1], #80
and r2, r10, r4, lsr #18
and r3, r4, r7, lsr #4
orr r2, r2, r3, lsl #3
and r3, r11, r4, lsr #14
orr r2, r2, r3
and r3, r4, r12, lsr #11
orr r2, r2, r3, lsl #15
and r3, r12, r4, lsr #1
orr r2, r2, r3
and r4, r4, r7, lsr #16
orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r9, r5, lsr #2
and r3, r9, r5
orr r2, r2, r3, lsl #2
and r3, r8, r5, lsr #1
orr r2, r2, r3
and r5, r5, r7
orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5)
str.w r4, [r1, #4]
str.w r5, [r1], #80
and r2, r10, r5, lsr #18
and r3, r5, r7, lsr #4
orr r2, r2, r3, lsl #3
and r3, r11, r5, lsr #14
orr r2, r2, r3
and r3, r5, r12, lsr #11
orr r2, r2, r3, lsl #15
and r3, r12, r5, lsr #1
orr r2, r2, r3
and r5, r5, r7, lsr #16
orr r5, r2, r5, lsl #19 //KEY_TRIPLE_UPDATE_4(r5)
and r2, r9, r4, lsr #2
and r3, r9, r4
orr r2, r2, r3, lsl #2
and r3, r8, r4, lsr #1
orr r2, r2, r3
and r4, r4, r7
orr r4, r2, r4, lsl #3 //KEY_DOUBLE_UPDATE_4(r4)
str.w r5, [r1, #4]
str.w r4, [r1], #80
and r2, r10, r4, lsr #18
and r3, r4, r7, lsr #4
orr r2, r2, r3, lsl #3
and r3, r11, r4, lsr #14
orr r2, r2, r3
and r3, r4, r12, lsr #11
orr r2, r2, r3, lsl #15
and r3, r12, r4, lsr #1
orr r2, r2, r3
and r4, r4, r7, lsr #16
orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r9, r5, lsr #2
and r3, r9, r5
orr r2, r2, r3, lsl #2
and r3, r8, r5, lsr #1
orr r2, r2, r3
and r5, r5, r7
orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5)
str.w r4, [r1, #4]
str.w r5, [r1], #80
bx lr
.align 2
key_update_4:
ldrd r4, r5, [r1], #80
and r2, r7, r4, lsr #6
and r3, r4, #0x003f0000
orr r2, r2, r3, lsl #10
and r3, r12, r4, lsr #4
orr r2, r2, r3
and r4, r4, #0x000f
orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r10, r5, lsr #4
and r3, r5, #0x000f0000
orr r2, r2, r3, lsl #12
and r3, r8, r5, lsr #8
orr r2, r2, r3
and r5, r5, r8
orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5)
str.w r4, [r1, #4]
str.w r5, [r1], #80
and r2, r7, r5, lsr #6
and r3, r5, #0x003f0000
orr r2, r2, r3, lsl #10
and r3, r12, r5, lsr #4
orr r2, r2, r3
and r5, r5, #0x000f
orr r5, r2, r5, lsl #12 //KEY_TRIPLE_UPDATE_4(r5)
and r2, r10, r4, lsr #4
and r3, r4, #0x000f0000
orr r2, r2, r3, lsl #12
and r3, r8, r4, lsr #8
orr r2, r2, r3
and r4, r4, r8
orr r4, r2, r4, lsl #8 //KEY_DOUBLE_UPDATE_4(r4)
str.w r5, [r1, #4]
str.w r4, [r1], #80
and r2, r7, r4, lsr #6
and r3, r4, #0x003f0000
orr r2, r2, r3, lsl #10
and r3, r12, r4, lsr #4
orr r2, r2, r3
and r4, r4, #0x000f
orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r10, r5, lsr #4
and r3, r5, #0x000f0000
orr r2, r2, r3, lsl #12
and r3, r8, r5, lsr #8
orr r2, r2, r3
and r5, r5, r8
orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5)
str.w r4, [r1, #4]
str.w r5, [r1], #80
bx lr
/*****************************************************************************
* Balanced implementation of the GIFT-128 key schedule according to the
* fixsliced representation.
*****************************************************************************/
.align 2
@ void gift128_keyschedule(const u8* key, u32* rkey) {
.global gift128_keyschedule
.type gift128_keyschedule,%function
gift128_keyschedule:
push {r1-r12, r14}
ldm r0, {r4-r7} //load key words
rev r4, r4 //endianness (could be skipped with another representation)
rev r5, r5 //endianness (could be skipped with another representation)
rev r6, r6 //endianness (could be skipped with another representation)
rev r7, r7 //endianness (could be skipped with another representation)
str.w r5, [r1, #4]
str.w r7, [r1], #8 //the first rkeys are not updated
str.w r4, [r1, #4]
str.w r6, [r1], #8 //the first rkeys are not updated
movw r12, #0x3fff
lsl r12, r12, #16 //r12<- 0x3fff0000
movw r10, #0x000f //r10<- 0x0000000f
movw r9, #0x0fff //r9 <- 0x00000fff
bl classical_key_update //keyschedule using classical representation (10 rounds)
bl classical_key_update //keyschedule using classical representation (20 rounds)
sub.w r1, r1, #80
movw r3, #0x0055
movt r3, #0x0055 //r3 <- 0x00550055
movw r10, #0x3333 //r10<- 0x00003333
movw r11, #0x000f
movt r11, #0x000f //r11<- 0x000f000f
bl rearrange_rkey_0 //fixslice the rkeys
add.w r1, r1, #40
bl rearrange_rkey_0 //fixslice the rkeys
sub.w r1, r1, #32
movw r3, #0x1111
movt r3, #0x1111 //r3 <- 0x11111111
movw r10, #0x0303
movt r10, #0x0303 //r10<- 0x03030303
bl rearrange_rkey_1 //fixslice the rkeys
add.w r1, r1, #40
bl rearrange_rkey_1 //fixslice the rkeys
sub.w r1, r1, #32
movw r3, #0xaaaa //r3 <- 0x0000aaaa
movw r10, #0x3333 //r10<- 0x00003333
movw r11, #0xf0f0 //r11<- 0x0000f0f0
bl rearrange_rkey_2 //fixslice the rkeys
add.w r1, r1, #40
bl rearrange_rkey_2 //fixslice the rkeys
sub.w r1, r1, #32
movw r3, #0x0a0a
movt r3, #0x0a0a //r3 <- 0x0a0a0a0a
movw r10, #0x00cc
movt r10, #0x00cc //r10<- 0x00cc00cc
bl rearrange_rkey_3 //fixslice the rkeys
add.w r1, r1, #40
bl rearrange_rkey_3 //fixslice the rkeys
sub.w r1, r1, #64
movw r10, #0x3333 //r10<- 0x00003333
eor r12, r10, r10, lsl #16 //r12<- 0w33333333
mvn r11, r12 //r11<- 0xcccccccc
movw r9, #0x4444
movt r9, #0x5555 //r9 <- 0x55554444
movw r8, #0x1100
movt r8, #0x5555 //r8 <- 0x55551100
bl key_update_0 //keyschedule according to fixslicing
sub.w r1, r1, #280
bl key_update_0 //keyschedule according to fixslicing
sub.w r1, r1, #352
movw r12, #0x0f00
movt r12, #0x0f00 //r12<- 0x0f000f00
movw r11, #0x0003
movt r11, #0x0003 //r11<- 0x00030003
movw r10, #0x003f
movt r10, #0x003f //r10<- 0x003f003f
lsl r9, r11, #8 //r9 <- 0x03000300
and r8, r10, r10, lsr #3 //r8 <- 0x00070007
orr r7, r8, r8, lsl #2 //r7 <- 0x001f001f
bl key_update_1 //keyschedule according to fixslicing
sub.w r1, r1, #280
bl key_update_1 //keyschedule according to fixslicing
sub.w r1, r1, #352
movw r12, #0x5555
movt r12, #0x5555 //r12<- 0x55555555
mvn r11, r12 //r11<- 0xaaaaaaaa
bl key_update_2 //keyschedule according to fixslicing
sub.w r1, r1, #280
bl key_update_2 //keyschedule according to fixslicing
sub.w r1, r1, #352
orr r12, r8, r8, lsl #8 //r12<- 0x07070707
movw r11, #0xc0c0 //r11<- 0x0000c0c0
movw r10, #0x3030 //r10<- 0x00003030
and r9, r12, r12, lsr #1 //r9 <- 0x03030303
lsl r8, r12, #4 //r8 <- 0x70707070
eor r7, r8, r9, lsl #5 //r7 <- 0x10101010
movw r6, #0xf0f0 //r6 <- 0x0000f0f0
bl key_update_3 //keyschedule according to fixslicing
sub.w r1, r1, #280
bl key_update_3 //keyschedule according to fixslicing
sub.w r1, r1, #352
movw r12, #0x0fff
lsl r10, r12, #16
movw r8, #0x00ff //r8 <- 0x000000ff
movw r7, #0x03ff //r7 <- 0x000003ff
lsl r7, r7, #16
bl key_update_4 //keyschedule according to fixslicing
sub.w r1, r1, #280
bl key_update_4 //keyschedule according to fixslicing
pop {r1-r12,r14}
bx lr
.align 2
quintuple_round:
str.w r14, [sp]
ldr.w r5, [r0], #4
ldr.w r6, [r1], #4 //load rkey
ldr.w r7, [r1], #4 //load rkey
and r8, r11, r9 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
and r8, r4, r12, lsr #1 //permutation layer
and r12, r12, r2
orr r12, r8, r12, lsl #3 //r12<- NIBBLE_ROR(r12, 1)
and r8, r4, r11
and r11, r2, r11, lsr #3
orr r11, r11, r8, lsl #1 //r11<- NIBBLE_ROR(r11, 3)
orr r14, r2, r2, lsl #1 //r14 <- 0x33333333
and r8, r14, r10, lsr #2
and r10, r10, r14
orr r10, r8, r10, lsl #2 //r10<- NIBBLE_ROR(r10, 2)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
ldr.w r5, [r0], #4
ldr.w r6, [r1], #4 //load rkey
ldr.w r7, [r1], #4 //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
mvn r14, r3, lsl #12 //r0 <- 0x0fff0fff
and r8, r14, r9, lsr #4
and r9, r9, r3
orr r9, r8, r9, lsl #12 //r9 <- HALF_ROR(r9, 4)
and r8, r3, r11, lsr #12
and r11, r11, r14
orr r11, r8, r11, lsl #4 //r11<- HALF_ROR(r11, 12)
rev16 r10, r10 //r10<- HALF_ROR(r10, 8)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
ldr.w r5, [r0], #4
ldr.w r6, [r1], #4 //load rkey
ldr.w r7, [r1], #4 //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
orr r14, r2, r2, lsl #2 //r14 <- 0x55555555 for SWAPMOVE
eor r8, r10, r10, lsr #1
and r8, r8, r14
eor r10, r10, r8
eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1)
eor r8, r12, r12, lsr #1
and r8, r8, r14, lsr #16
eor r12, r12, r8
eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x55550000, 1)
eor r8, r11, r11, lsr #1
and r8, r8, r14, lsl #16
eor r11, r11, r8
eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x00005555, 1)
eor r10, r10, r6 //add 1st keyword
eor r11, r7, r11, ror #16 //add 2nd keyword
eor r9, r9, r5 //add rconst
ldr.w r5, [r0], #4
ldr.w r6, [r1], #4 //load rkey
ldr.w r7, [r1], #4 //load rkey
and r8, r11, r12, ror #16 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r8, r12, ror #16
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
eor r14, r3, r3, lsl #8 //r14 <- 0x0f0f0f0f for BYTE_ROR
and r8, r14, r10, lsr #4
and r10, r10, r14
orr r10, r8, r10, lsl #4 //r10<- BYTE_ROR(r10, 4)
orr r14, r14, r14, lsl #2 //r14 <- 0x3f3f3f3f for BYTE_ROR
mvn r8, r14 //r8 <- 0xc0c0c0c0 for BYTE_ROR
and r8, r8, r11, lsl #6
and r11, r14, r11, lsr #2
orr r11, r11, r8 //r11<- BYTE_ROR(r11, 2)
mvn r8, r14, lsr #6
and r8, r8, r9, lsr #6
and r9, r14, r9
orr r9, r8, r9, lsl #2 //r9 <- BYTE_ROR(r9, 6)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
ldr.w r5, [r0], #4
ldr.w r6, [r1], #4 //load rkey
ldr.w r7, [r1], #4 //load rkey
ldr.w lr, [sp] //restore link register
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12, ror #24
eor r10, r6, r10, ror #16 //add 1st keyword
eor r11, r7, r11, ror #8 //add 2nd keyword
eor r9, r9, r5 //add rconst
eor r9, r9, r12 //swap r9 with r12
eor r12, r12, r9 //swap r9 with r12
eor r9, r9, r12 //swap r9 with r12
bx lr
/*****************************************************************************
* Balanced ARM assembly implementation of the GIFTb-128 block cipher.
* This function simply encrypts a 128-bit block, without any operation mode.
*****************************************************************************/
.align 2
@ void giftb128_encrypt_block(u8 *out, const u32* rkey, const u8 *block)
.global giftb128_encrypt_block
.type giftb128_encrypt_block,%function
giftb128_encrypt_block:
push {r0,r2-r12,r14}
sub.w sp, #4 //to store 'lr' when calling 'quintuple_round'
ldm r2, {r9-r12} // load plaintext words
rev r9, r9
rev r10, r10
rev r11, r11
rev r12, r12
movw r2, #0x1111
movt r2, #0x1111 //r2 <- 0x11111111 (for NIBBLE_ROR)
movw r3, #0x000f
movt r3, #0x000f //r3 <- 0x000f000f (for HALF_ROR)
mvn r4, r2, lsl #3 //r4 <- 0x7777777 (for NIBBLE_ROR)
adr r0, rconst //r0 <- 'rconst' address
bl quintuple_round
bl quintuple_round
bl quintuple_round
bl quintuple_round
bl quintuple_round
bl quintuple_round
bl quintuple_round
bl quintuple_round
ldr.w r0, [sp ,#4] //restore 'ctext' address
rev r9, r9
rev r10, r10
rev r11, r11
rev r12, r12
stm r0, {r9-r12}
add.w sp, #4
pop {r0,r2-r12,r14}
bx lr
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#ifndef GIFT_COFB_H_
#define GIFT_COFB_H_
#define TAG_SIZE 16
#define COFB_ENCRYPT 1
#define COFB_DECRYPT 0
#define XOR_BLOCK(x, y, z) ({ \
(x)[0] = (y)[0] ^ (z)[0]; \
(x)[1] = (y)[1] ^ (z)[1]; \
(x)[2] = (y)[2] ^ (z)[2]; \
(x)[3] = (y)[3] ^ (z)[3]; \
})
#define XOR_TOP_BAR_BLOCK(x, y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
})
#endif // GIFT_COFB_H_
\ No newline at end of file
#include <string.h>
#include "api.h"
#include "cofb.h"
#include "giftb128.h"
static inline void padding(u32* d, const u32* s, const u32 no_of_bytes){
u32 i;
if (no_of_bytes == 0) {
d[0] = 0x00000080; // little-endian
d[1] = 0x00000000;
d[2] = 0x00000000;
d[3] = 0x00000000;
}
else if (no_of_bytes < GIFT128_BLOCK_SIZE) {
for (i = 0; i < no_of_bytes/4+1; i++)
d[i] = s[i];
d[i-1] &= ~(0xffffffffL << (no_of_bytes % 4)*8);
d[i-1] |= 0x00000080L << (no_of_bytes % 4)*8;
for (; i < 4; i++)
d[i] = 0x00000000;
}
else {
d[0] = s[0];
d[1] = s[1];
d[2] = s[2];
d[3] = s[3];
}
}
static inline void double_half_block(u32* x) {
u32 tmp0;
tmp0 = (x)[0];
(x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15);
(x)[0] |= ((x)[1] & 0x80808080) << 17;
(x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15);
(x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24;
}
static inline void triple_half_block(u32* x) {
u32 tmp0, tmp1;
tmp0 = (x)[0];
tmp1 = (x)[1];
(x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15);
(x)[0] |= ((x)[1] & 0x80808080) << 17;
(x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15);
(x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24;
(x)[0] ^= tmp0;
(x)[1] ^= tmp1;
}
static inline void g(u32 *x) {
u32 tmp0, tmp1;
tmp0 = (x)[0];
tmp1 = (x)[1];
(x)[0] = (x)[2];
(x)[1] = (x)[3];
(x)[2] = ((tmp0 & 0x7f7f7f7f) << 1) | ((tmp0 & 0x80808080) >> 15);
(x)[2] |= ((tmp1 & 0x80808080) << 17);
(x)[3] = ((tmp1 & 0x7f7f7f7f) << 1) | ((tmp1 & 0x80808080) >> 15);
(x)[3] |= ((tmp0 & 0x80808080) << 17);
}
static inline void rho1(u32* d, u32* y, u32* m, u32 n) {
g(y);
padding(d,m,n);
XOR_BLOCK(d, d, y);
}
static inline void rho(u32* y, u32* m, u32* x, u32* c, u32 n) {
XOR_BLOCK(c, y, m);
rho1(x, y, m, n);
}
static inline void rho_prime(u32* y, u32*c, u32* x, u32* m, u32 n) {
XOR_BLOCK(m, y, c);
rho1(x, y, m, n);
}
/****************************************************************************
* Constant-time implementation of the GIFT-COFB authenticated cipher based on
* fixsliced GIFTb-128. Encryption/decryption is handled by the same function,
* depending on the 'mode' parameter (1/0).
****************************************************************************/
int giftcofb_crypt(u8* out, const u8* key, const u8* nonce, const u8* ad,
u32 ad_len, const u8* in, u32 in_len, const int encrypting) {
u32 tmp0, tmp1, emptyA, emptyM, offset[2];
u32 input[4], rkey[80];
u8 Y[GIFT128_BLOCK_SIZE];
if (!encrypting) {
if (in_len < TAG_SIZE)
return -1;
in_len -= TAG_SIZE;
}
if(ad_len == 0)
emptyA = 1;
else
emptyA = 0;
if(in_len == 0)
emptyM =1;
else
emptyM = 0;
gift128_keyschedule(key, rkey);
giftb128_encrypt_block(Y, rkey, nonce);
offset[0] = ((u32*)Y)[0];
offset[1] = ((u32*)Y)[1];
while(ad_len > GIFT128_BLOCK_SIZE){
rho1(input, (u32*)Y, (u32*)ad, GIFT128_BLOCK_SIZE);
double_half_block(offset);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
ad += GIFT128_BLOCK_SIZE;
ad_len -= GIFT128_BLOCK_SIZE;
}
triple_half_block(offset);
if((ad_len % GIFT128_BLOCK_SIZE != 0) || (emptyA))
triple_half_block(offset);
if(emptyM) {
triple_half_block(offset);
triple_half_block(offset);
}
rho1(input, (u32*)Y, (u32*)ad, ad_len);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
while (in_len > GIFT128_BLOCK_SIZE){
double_half_block(offset);
if (encrypting)
rho((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE);
else
rho_prime((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
in += GIFT128_BLOCK_SIZE;
out += GIFT128_BLOCK_SIZE;
in_len -= GIFT128_BLOCK_SIZE;
}
if(!emptyM){
triple_half_block(offset);
if(in_len % GIFT128_BLOCK_SIZE != 0)
triple_half_block(offset);
if (encrypting) {
rho((u32*)Y, (u32*)in, input, (u32*)out, in_len);
out += in_len;
}
else {
rho_prime((u32*)Y, (u32*)in, input, (u32*)out, in_len);
in += in_len;
}
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
}
if (encrypting) { // encryption mode
memcpy(out, Y, TAG_SIZE);
return 0;
}
// decrypting
tmp0 = 0;
for(tmp1 = 0; tmp1 < TAG_SIZE; tmp1++)
tmp0 |= in[tmp1] ^ Y[tmp1];
return tmp0;
}
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
(void)nsec;
*clen = mlen + TAG_SIZE;
return giftcofb_crypt(c, k, npub, ad, adlen, m, mlen, COFB_ENCRYPT);
}
int crypto_aead_decrypt(unsigned char* m, unsigned long long *mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char *k) {
(void)nsec;
*mlen = clen - TAG_SIZE;
return giftcofb_crypt(m, k, npub, ad, adlen, c, clen, COFB_DECRYPT);
}
#ifndef GIFT128_H_
#define GIFT128_H_
#define KEY_SIZE 16
#define GIFT128_BLOCK_SIZE 16
typedef unsigned char u8;
typedef unsigned int u32;
extern void gift128_keyschedule(const u8* key, u32* rkey);
extern void giftb128_encrypt_block(u8* out_block, const u32* rkey, const u8* in_block);
#endif // GIFT128_H_
\ No newline at end of file
/****************************************************************************
* Compact ARM assembly implementation of the GIFT-128 block cipher. This
* implementation focuses on code size rather than speed.
* See "Fixslicing: A New GIFT Representation" paper available at
* https:// for more details.
****************************************************************************/
.syntax unified
.thumb
/*****************************************************************************
* Round constants look-up table according to the fixsliced representation.
*****************************************************************************/
.align 2
.type rconst,%object
rconst:
.word 0x10000008, 0x80018000, 0x54000002, 0x01010181
.word 0x8000001f, 0x10888880, 0x6001e000, 0x51500002
.word 0x03030180, 0x8000002f, 0x10088880, 0x60016000
.word 0x41500002, 0x03030080, 0x80000027, 0x10008880
.word 0x4001e000, 0x11500002, 0x03020180, 0x8000002b
.word 0x10080880, 0x60014000, 0x01400002, 0x02020080
.word 0x80000021, 0x10000080, 0x0001c000, 0x51000002
.word 0x03010180, 0x8000002e, 0x10088800, 0x60012000
.word 0x40500002, 0x01030080, 0x80000006, 0x10008808
.word 0xc001a000, 0x14500002, 0x01020181, 0x8000001a
.align 2
key_update:
and r2, r10, r7, lsr #12
and r3, r7, r9
orr r2, r2, r3, lsl #4
and r3, r12, r7, lsr #2
orr r2, r2, r3
and r7, r7, #0x00030000
orr r7, r2, r7, lsl #14
strd r5, r7, [r1], #8 //store rkeys after 1st key update
and r2, r10, r6, lsr #12
and r3, r6, r9
orr r2, r2, r3, lsl #4
and r3, r12, r6, lsr #2
orr r2, r2, r3
and r6, r6, #0x00030000
orr r6, r2, r6, lsl #14
strd r4, r6, [r1], #8 //store rkeys after 2nd key update
and r2, r10, r5, lsr #12
and r3, r5, r9
orr r2, r2, r3, lsl #4
and r3, r12, r5, lsr #2
orr r2, r2, r3
and r5, r5, #0x00030000
orr r5, r2, r5, lsl #14
strd r7, r5, [r1], #8 //store rkeys after 3rd key update
and r2, r10, r4, lsr #12
and r3, r4, r9
orr r2, r2, r3, lsl #4
and r3, r12, r4, lsr #2
orr r2, r2, r3
and r4, r4, #0x00030000
orr r4, r2, r4, lsl #14
strd r6, r4, [r1], #8 //store rkeys after 4th key update
bx lr
.align 2
rearrange_rkey_0:
ldrd r6, r4, [r1]
eor r12, r6, r6, lsr #9
and r12, r12, r3
eor r6, r12
eor r6, r6, r12, lsl #9 //SWAPMOVE(r6, r6, 0x00550055, 9);
eor r12, r4, r4, lsr #9
and r12, r12, r3
eor r4, r12
eor r4, r4, r12, lsl #9 //SWAPMOVE(r4, r4, 0x00550055, 9);
eor r12, r6, r6, lsr #18
and r12, r12, r10
eor r6, r12
eor r6, r6, r12, lsl #18 //SWAPMOVE(r6, r6, 0x3333, 18);
eor r12, r4, r4, lsr #18
and r12, r12, r10
eor r4, r12
eor r4, r4, r12, lsl #18 //SWAPMOVE(r4, r4, 0x3333, 18);
eor r12, r6, r6, lsr #12
and r12, r12, r11
eor r6, r12
eor r6, r6, r12, lsl #12 //SWAPMOVE(r6, r6, 0x000f000f, 12);
eor r12, r4, r4, lsr #12
and r12, r12, r11
eor r4, r12
eor r4, r4, r12, lsl #12 //SWAPMOVE(r4, r4, 0x000f000f, 12);
eor r12, r6, r6, lsr #24
and r12, r12, #0xff
eor r6, r12
eor r6, r6, r12, lsl #24 //SWAPMOVE(r6, r6, 0x000000ff, 24);
eor r12, r4, r4, lsr #24
and r12, r12, #0xff
eor r4, r12
eor r4, r4, r12, lsl #24 //SWAPMOVE(r4, r4, 0x000000ff, 24);
strd r6, r4, [r1]
bx lr
.align 2
rearrange_rkey_1:
ldrd r5, r7, [r1]
eor r8, r7, r7, lsr #3
and r8, r8, r3
eor r7, r8
eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x11111111, 3);
eor r8, r5, r5, lsr #3
and r8, r8, r3
eor r5, r8
eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x11111111, 3);
eor r8, r7, r7, lsr #6
and r8, r8, r10
eor r7, r8
eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x03030303, 6);
eor r8, r5, r5, lsr #6
and r8, r8, r10
eor r5, r8
eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x03030303, 6);
eor r8, r7, r7, lsr #12
and r8, r8, r11
eor r7, r8
eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r5, r5, lsr #12
and r8, r8, r11
eor r5, r8
eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r7, r7, lsr #24
and r8, r8, #0xff
eor r7, r8
eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24);
eor r8, r5, r5, lsr #24
and r8, r8, #0xff
eor r5, r8
eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24);
strd r5, r7, [r1]
bx lr
.align 2
rearrange_rkey_2:
ldrd r5, r7, [r1]
eor r8, r7, r7, lsr #15
and r8, r8, r3
eor r7, r8
eor r7, r7, r8, lsl #15 //SWAPMOVE(r7, r7, 0x0000aaaa, 15);
eor r8, r5, r5, lsr #15
and r8, r8, r3
eor r5, r8
eor r5, r5, r8, lsl #15 //SWAPMOVE(r5, r5, 0x0000aaaa, 15);
eor r8, r7, r7, lsr #18
and r8, r8, r10
eor r7, r8
eor r7, r7, r8, lsl #18 //SWAPMOVE(r7, r7, 0x00003333, 18);
eor r8, r5, r5, lsr #18
and r8, r8, r10
eor r5, r8
eor r5, r5, r8, lsl #18 //SWAPMOVE(r5, r5, 0x00003333, 18);
eor r8, r7, r7, lsr #12
and r8, r8, r11
eor r7, r8
eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r5, r5, lsr #12
and r8, r8, r11
eor r5, r8
eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r7, r7, lsr #24
and r8, r8, #0xff
eor r7, r8
eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x00000ff, 24);
eor r8, r5, r5, lsr #24
and r8, r8, #0xff
eor r5, r8
eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24);
strd r5, r7, [r1]
bx lr
.align 2
rearrange_rkey_3:
ldrd r5, r7, [r1]
eor r8, r7, r7, lsr #3
and r8, r8, r3
eor r7, r8
eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x0a0a0a0a, 3);
eor r8, r5, r5, lsr #3
and r8, r8, r3
eor r5, r8
eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x0a0a0a0a, 3);
eor r8, r7, r7, lsr #6
and r8, r8, r10
eor r7, r8
eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x00cc00cc, 6);
eor r8, r5, r5, lsr #6
and r8, r8, r10
eor r5, r8
eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x00cc00cc, 6);
eor r8, r7, r7, lsr #12
and r8, r8, r11
eor r7, r8
eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r5, r5, lsr #12
and r8, r8, r11
eor r5, r8
eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r7, r7, lsr #24
and r8, r8, #0xff
eor r7, r8
eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24);
eor r8, r5, r5, lsr #24
and r8, r8, #0xff
eor r5, r8
eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24);
strd r5, r7, [r1]
bx lr
/*****************************************************************************
* Code size optimized implementation of the GIFTb-128 key schedule.
* Compute the key schedule in the normal representation and then rearrange all
* the round keys in their respective fixsliced representations.
*****************************************************************************/
.align 2
@ void gift128_keyschedule(const u8* key, u32* rkey)
.global gift128_keyschedule
.type gift128_keyschedule,%function
gift128_keyschedule:
push {r1-r12, r14}
ldm r0, {r4-r7} //load key words
rev r4, r4
rev r5, r5
rev r6, r6
rev r7, r7
strd r7, r5, [r1], #8 //the first rkeys are not updated
strd r6, r4, [r1], #8 //the first rkeys are not updated
// keyschedule using classical representation for the first 20 rounds
movw r12, #0x3fff
lsl r12, r12, #16 //r12<- 0x3fff0000
movw r10, #0x000f //r10<- 0x0000000f
movw r9, #0x0fff //r9 <- 0x00000fff
bl key_update
bl key_update
bl key_update
bl key_update
bl key_update
bl key_update
bl key_update
bl key_update
bl key_update
and r2, r10, r7, lsr #12
and r3, r7, r9
orr r2, r2, r3, lsl #4
and r3, r12, r7, lsr #2
orr r2, r2, r3
and r7, r7, #0x00030000
orr r7, r2, r7, lsl #14
strd r5, r7, [r1], #8 //penultimate key update
and r2, r10, r6, lsr #12
and r3, r6, r9
orr r2, r2, r3, lsl #4
and r3, r12, r6, lsr #2
orr r2, r2, r3
and r6, r6, #0x00030000
orr r6, r2, r6, lsl #14
strd r4, r6, [r1], #8 //ultimate key update
sub.w r1, r1, #320
// rearrange the rkeys to their respective new representations
movw r3, #0x0055
movt r3, #0x0055 //r3 <- 0x00550055
movw r10, #0x3333 //r10<- 0x00003333
movw r11, #0x000f
movt r11, #0x000f //r11<- 0x000f000f
bl rearrange_rkey_0
add.w r1, r1, #40
bl rearrange_rkey_0
add.w r1, r1, #40
bl rearrange_rkey_0
add.w r1, r1, #40
bl rearrange_rkey_0
add.w r1, r1, #40
bl rearrange_rkey_0
add.w r1, r1, #40
bl rearrange_rkey_0
add.w r1, r1, #40
bl rearrange_rkey_0
add.w r1, r1, #40
bl rearrange_rkey_0
sub.w r1, r1, #272
movw r3, #0x1111
movt r3, #0x1111 //r3 <- 0x11111111
movw r10, #0x0303
movt r10, #0x0303 //r10<- 0x03030303
bl rearrange_rkey_1
add.w r1, r1, #40
bl rearrange_rkey_1
add.w r1, r1, #40
bl rearrange_rkey_1
add.w r1, r1, #40
bl rearrange_rkey_1
add.w r1, r1, #40
bl rearrange_rkey_1
add.w r1, r1, #40
bl rearrange_rkey_1
add.w r1, r1, #40
bl rearrange_rkey_1
add.w r1, r1, #40
bl rearrange_rkey_1
sub.w r1, r1, #272
movw r3, #0xaaaa //r3 <- 0x0000aaaa
movw r10, #0x3333 //r10<- 0x00003333
movw r11, #0xf0f0 //r11<- 0x0000f0f0
bl rearrange_rkey_2
add.w r1, r1, #40
bl rearrange_rkey_2
add.w r1, r1, #40
bl rearrange_rkey_2
add.w r1, r1, #40
bl rearrange_rkey_2
add.w r1, r1, #40
bl rearrange_rkey_2
add.w r1, r1, #40
bl rearrange_rkey_2
add.w r1, r1, #40
bl rearrange_rkey_2
add.w r1, r1, #40
bl rearrange_rkey_2
sub.w r1, r1, #272
movw r3, #0x0a0a
movt r3, #0x0a0a //r3 <- 0x0a0a0a0a
movw r10, #0x00cc
movt r10, #0x00cc //r10<- 0x00cc00cc
bl rearrange_rkey_3
add.w r1, r1, #40
bl rearrange_rkey_3
add.w r1, r1, #40
bl rearrange_rkey_3
add.w r1, r1, #40
bl rearrange_rkey_3
add.w r1, r1, #40
bl rearrange_rkey_3
add.w r1, r1, #40
bl rearrange_rkey_3
add.w r1, r1, #40
bl rearrange_rkey_3
add.w r1, r1, #40
bl rearrange_rkey_3
pop {r1-r12, r14}
bx lr
.align 2
quintuple_round:
str.w r14, [sp]
ldr.w r5, [r0], #4
ldr.w r6, [r1], #4 //load rkey
ldr.w r7, [r1], #4 //load rkey
and r8, r11, r9 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
and r8, r4, r12, lsr #1 //permutation layer
and r12, r12, r2
orr r12, r8, r12, lsl #3 //r12<- NIBBLE_ROR(r12, 1)
and r8, r4, r11
and r11, r2, r11, lsr #3
orr r11, r11, r8, lsl #1 //r11<- NIBBLE_ROR(r11, 3)
orr r14, r2, r2, lsl #1 //r14 <- 0x33333333
and r8, r14, r10, lsr #2
and r10, r10, r14
orr r10, r8, r10, lsl #2 //r10<- NIBBLE_ROR(r10, 2)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
ldr.w r5, [r0], #4
ldr.w r6, [r1], #4 //load rkey
ldr.w r7, [r1], #4 //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
mvn r14, r3, lsl #12 //r0 <- 0x0fff0fff
and r8, r14, r9, lsr #4
and r9, r9, r3
orr r9, r8, r9, lsl #12 //r9 <- HALF_ROR(r9, 4)
and r8, r3, r11, lsr #12
and r11, r11, r14
orr r11, r8, r11, lsl #4 //r11<- HALF_ROR(r11, 12)
rev16 r10, r10 //r10<- HALF_ROR(r10, 8)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
ldr.w r5, [r0], #4
ldr.w r6, [r1], #4 //load rkey
ldr.w r7, [r1], #4 //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
orr r14, r2, r2, lsl #2 //r14 <- 0x55555555 for SWAPMOVE
eor r8, r10, r10, lsr #1
and r8, r8, r14
eor r10, r10, r8
eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1)
eor r8, r12, r12, lsr #1
and r8, r8, r14, lsr #16
eor r12, r12, r8
eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x55550000, 1)
eor r8, r11, r11, lsr #1
and r8, r8, r14, lsl #16
eor r11, r11, r8
eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x00005555, 1)
eor r10, r10, r6 //add 1st keyword
eor r11, r7, r11, ror #16 //add 2nd keyword
eor r9, r9, r5 //add rconst
ldr.w r5, [r0], #4
ldr.w r6, [r1], #4 //load rkey
ldr.w r7, [r1], #4 //load rkey
and r8, r11, r12, ror #16 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r8, r12, ror #16
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
eor r14, r3, r3, lsl #8 //r14 <- 0x0f0f0f0f for BYTE_ROR
and r8, r14, r10, lsr #4
and r10, r10, r14
orr r10, r8, r10, lsl #4 //r10<- BYTE_ROR(r10, 4)
orr r14, r14, r14, lsl #2 //r14 <- 0x3f3f3f3f for BYTE_ROR
mvn r8, r14 //r8 <- 0xc0c0c0c0 for BYTE_ROR
and r8, r8, r11, lsl #6
and r11, r14, r11, lsr #2
orr r11, r11, r8 //r11<- BYTE_ROR(r11, 2)
mvn r8, r14, lsr #6
and r8, r8, r9, lsr #6
and r9, r14, r9
orr r9, r8, r9, lsl #2 //r9 <- BYTE_ROR(r9, 6)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
ldr.w r5, [r0], #4
ldr.w r6, [r1], #4 //load rkey
ldr.w r7, [r1], #4 //load rkey
ldr.w lr, [sp] //restore link register
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12, ror #24
eor r10, r6, r10, ror #16 //add 1st keyword
eor r11, r7, r11, ror #8 //add 2nd keyword
eor r9, r9, r5 //add rconst
eor r9, r9, r12 //swap r9 with r12
eor r12, r12, r9 //swap r9 with r12
eor r9, r9, r12 //swap r9 with r12
bx lr
/*****************************************************************************
* Code size optimized implementation of the GIFTb-128 block cipher.
* This function simply encrypts a 128-bit block, without any operation mode.
*****************************************************************************/
.align 2
@ void giftb128_encrypt_block(u8 *out, const u32* rkey, const u8 *block)
.global giftb128_encrypt_block
.type giftb128_encrypt_block,%function
giftb128_encrypt_block:
push {r0,r2-r12,r14}
sub.w sp, #4 //to store 'lr' when calling 'quintuple_round'
ldm r2, {r9-r12} // load plaintext words
rev r9, r9
rev r10, r10
rev r11, r11
rev r12, r12
movw r2, #0x1111
movt r2, #0x1111 //r2 <- 0x11111111 (for NIBBLE_ROR)
movw r3, #0x000f
movt r3, #0x000f //r3 <- 0x000f000f (for HALF_ROR)
mvn r4, r2, lsl #3 //r4 <- 0x7777777 (for NIBBLE_ROR)
adr r0, rconst //r0 <- 'rconst' address
bl quintuple_round
bl quintuple_round
bl quintuple_round
bl quintuple_round
bl quintuple_round
bl quintuple_round
bl quintuple_round
bl quintuple_round
ldr.w r0, [sp ,#4] //restore 'ctext' address
rev r9, r9
rev r10, r10
rev r11, r11
rev r12, r12
stm r0, {r9-r12}
add.w sp, #4
pop {r0,r2-r12,r14}
bx lr
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#ifndef GIFT_COFB_H_
#define GIFT_COFB_H_
#define TAG_SIZE 16
#define COFB_ENCRYPT 1
#define COFB_DECRYPT 0
#define DOUBLE_HALF_BLOCK(x) ({ \
tmp0 = (x)[0]; \
(x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); \
(x)[0] |= ((x)[1] & 0x80808080) << 17; \
(x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); \
(x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; \
})
#define TRIPLE_HALF_BLOCK(x) ({ \
tmp0 = (x)[0]; \
tmp1 = (x)[1]; \
(x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); \
(x)[0] |= ((x)[1] & 0x80808080) << 17; \
(x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); \
(x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; \
(x)[0] ^= tmp0; \
(x)[1] ^= tmp1; \
})
#define G(x) ({ \
tmp0 = (x)[0]; \
tmp1 = (x)[1]; \
(x)[0] = (x)[2]; \
(x)[1] = (x)[3]; \
(x)[2] = ((tmp0 & 0x7f7f7f7f) << 1) | ((tmp0 & 0x80808080) >> 15); \
(x)[2] |= ((tmp1 & 0x80808080) << 17); \
(x)[3] = ((tmp1 & 0x7f7f7f7f) << 1) | ((tmp1 & 0x80808080) >> 15); \
(x)[3] |= ((tmp0 & 0x80808080) << 17); \
})
#define XOR_BLOCK(x, y, z) ({ \
(x)[0] = (y)[0] ^ (z)[0]; \
(x)[1] = (y)[1] ^ (z)[1]; \
(x)[2] = (y)[2] ^ (z)[2]; \
(x)[3] = (y)[3] ^ (z)[3]; \
})
#define XOR_TOP_BAR_BLOCK(x, y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
})
#define RHO1(d, y, m, n) ({ \
G(y); \
padding(d,m,n); \
XOR_BLOCK(d, d, y); \
})
#define RHO(y, m, x, c, n) ({ \
XOR_BLOCK(c, y, m); \
RHO1(x, y, m, n); \
})
#define RHO_PRIME(y, c, x, m, n) ({ \
XOR_BLOCK(m, y, c); \
RHO1(x, y, m, n); \
})
#endif // GIFT_COFB_H_
\ No newline at end of file
#include <string.h>
#include "cofb.h"
#include "giftb128.h"
static inline void padding(u32* d, const u32* s, const u32 no_of_bytes){
u32 i;
if (no_of_bytes == 0) {
d[0] = 0x00000080; // little-endian
d[1] = 0x00000000;
d[2] = 0x00000000;
d[3] = 0x00000000;
}
else if (no_of_bytes < GIFT128_BLOCK_SIZE) {
for (i = 0; i < no_of_bytes/4+1; i++)
d[i] = s[i];
d[i-1] &= ~(0xffffffffL << (no_of_bytes % 4)*8);
d[i-1] |= 0x00000080L << (no_of_bytes % 4)*8;
for (; i < 4; i++)
d[i] = 0x00000000;
}
else {
d[0] = s[0];
d[1] = s[1];
d[2] = s[2];
d[3] = s[3];
}
}
/****************************************************************************
* Constant-time implementation of the GIFT-COFB authenticated cipher based on
* fixsliced GIFTb-128. Encryption/decryption is handled by the same function,
* depending on the 'mode' parameter (1/0).
***************************************************************************/
int giftcofb_crypt(u8* out, const u8* key, const u8* nonce, const u8* ad,
u32 ad_len, const u8* in, u32 in_len, const int encrypting) {
u32 tmp0, tmp1, emptyA, emptyM, offset[2];
u32 input[4], rkey[80];
u8 Y[GIFT128_BLOCK_SIZE];
if (!encrypting) {
if (in_len < TAG_SIZE)
return -1;
in_len -= TAG_SIZE;
}
if(ad_len == 0)
emptyA = 1;
else
emptyA = 0;
if(in_len == 0)
emptyM =1;
else
emptyM = 0;
gift128_keyschedule(key, rkey);
giftb128_encrypt_block(Y, rkey, nonce);
offset[0] = ((u32*)Y)[0];
offset[1] = ((u32*)Y)[1];
while(ad_len > GIFT128_BLOCK_SIZE){
RHO1(input, (u32*)Y, (u32*)ad, GIFT128_BLOCK_SIZE);
DOUBLE_HALF_BLOCK(offset);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
ad += GIFT128_BLOCK_SIZE;
ad_len -= GIFT128_BLOCK_SIZE;
}
TRIPLE_HALF_BLOCK(offset);
if((ad_len % GIFT128_BLOCK_SIZE != 0) || (emptyA))
TRIPLE_HALF_BLOCK(offset);
if(emptyM) {
TRIPLE_HALF_BLOCK(offset);
TRIPLE_HALF_BLOCK(offset);
}
RHO1(input, (u32*)Y, (u32*)ad, ad_len);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
while (in_len > GIFT128_BLOCK_SIZE){
DOUBLE_HALF_BLOCK(offset);
if (encrypting)
RHO((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE);
else
RHO_PRIME((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
in += GIFT128_BLOCK_SIZE;
out += GIFT128_BLOCK_SIZE;
in_len -= GIFT128_BLOCK_SIZE;
}
if(!emptyM){
TRIPLE_HALF_BLOCK(offset);
if(in_len % GIFT128_BLOCK_SIZE != 0)
TRIPLE_HALF_BLOCK(offset);
if (encrypting) {
RHO((u32*)Y, (u32*)in, input, (u32*)out, in_len);
out += in_len;
}
else {
RHO_PRIME((u32*)Y, (u32*)in, input, (u32*)out, in_len);
in += in_len;
}
XOR_TOP_BAR_BLOCK(input, offset);
giftb128_encrypt_block(Y, rkey, (u8*)input);
}
if (encrypting) { // encryption mode
memcpy(out, Y, TAG_SIZE);
return 0;
}
// decrypting
tmp0 = 0;
for(tmp1 = 0; tmp1 < TAG_SIZE; tmp1++)
tmp0 |= in[tmp1] ^ Y[tmp1];
return tmp0;
}
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
(void)nsec;
*clen = mlen + TAG_SIZE;
return giftcofb_crypt(c, k, npub, ad, adlen, m, mlen, COFB_ENCRYPT);
}
int crypto_aead_decrypt(unsigned char* m, unsigned long long *mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char *k) {
(void)nsec;
*mlen = clen - TAG_SIZE;
return giftcofb_crypt(m, k, npub, ad, adlen, c, clen, COFB_DECRYPT);
}
#ifndef GIFT128_H_
#define GIFT128_H_
#define KEY_SIZE 16
#define GIFT128_BLOCK_SIZE 16
typedef unsigned char u8;
typedef unsigned int u32;
extern void gift128_keyschedule(const u8* key, u32* rkey);
extern void giftb128_encrypt_block(u8* out_block, const u32* rkey, const u8* in_block);
#endif // GIFT128_H_
\ No newline at end of file
/****************************************************************************
* Fully unrolled ARM assembly implementation of the GIFTn-128 block cipher.
* This implementation focuses on speed, at the cost of a large code size.
* See "Fixslicing: A New GIFT Representation" paper available at
* https:// for more details.
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* @date March 2020
****************************************************************************/
.syntax unified
.thumb
/*****************************************************************************
* Fully unrolled implementation of the GIFT-128 key schedule according to the
* fixsliced representation.
*****************************************************************************/
@ void gift128_keyschedule(const u8* key, u32* rkey)
.global gift128_keyschedule
.type gift128_keyschedule,%function
gift128_keyschedule:
push {r2-r12, r14}
ldm r0, {r4-r7} //load key words
rev r4, r4
rev r5, r5
rev r6, r6
rev r7, r7
str.w r6, [r1, #8]
str.w r4, [r1, #12]
str.w r7, [r1]
str.w r5, [r1, #4]
// keyschedule using classical representation for the first 20 rounds
movw r12, #0x3fff
lsl r12, r12, #16 //r12<- 0x3fff0000
movw r10, #0x000f //r10<- 0x0000000f
movw r9, #0x0fff //r9 <- 0x00000fff
// 1st classical key update
and r2, r10, r7, lsr #12
and r3, r7, r9
orr r2, r2, r3, lsl #4
and r3, r12, r7, lsr #2
orr r2, r2, r3
and r7, r7, #0x00030000
orr r7, r2, r7, lsl #14
str.w r5, [r1, #16]
str.w r7, [r1, #20]
// 2nd classical key update
and r2, r10, r6, lsr #12
and r3, r6, r9
orr r2, r2, r3, lsl #4
and r3, r12, r6, lsr #2
orr r2, r2, r3
and r6, r6, #0x00030000
orr r6, r2, r6, lsl #14
str.w r4, [r1, #24]
str.w r6, [r1, #28]
// 3rd classical key update
and r2, r10, r5, lsr #12
and r3, r5, r9
orr r2, r2, r3, lsl #4
and r3, r12, r5, lsr #2
orr r2, r2, r3
and r5, r5, #0x00030000
orr r5, r2, r5, lsl #14
str.w r7, [r1, #32]
str.w r5, [r1, #36]
// 4th classical key update
and r2, r10, r4, lsr #12
and r3, r4, r9
orr r2, r2, r3, lsl #4
and r3, r12, r4, lsr #2
orr r2, r2, r3
and r4, r4, #0x00030000
orr r4, r2, r4, lsl #14
str.w r6, [r1, #40]
str.w r4, [r1, #44]
// 5th classical key update
and r2, r10, r7, lsr #12
and r3, r7, r9
orr r2, r2, r3, lsl #4
and r3, r12, r7, lsr #2
orr r2, r2, r3
and r7, r7, #0x00030000
orr r7, r2, r7, lsl #14
str.w r5, [r1, #48]
str.w r7, [r1, #52]
// 6th classical key update
and r2, r10, r6, lsr #12
and r3, r6, r9
orr r2, r2, r3, lsl #4
and r3, r12, r6, lsr #2
orr r2, r2, r3
and r6, r6, #0x00030000
orr r6, r2, r6, lsl #14
str.w r4, [r1, #56]
str.w r6, [r1, #60]
// 7th classical key update
and r2, r10, r5, lsr #12
and r3, r5, r9
orr r2, r2, r3, lsl #4
and r3, r12, r5, lsr #2
orr r2, r2, r3
and r5, r5, #0x00030000
orr r5, r2, r5, lsl #14
str.w r7, [r1, #64]
str.w r5, [r1, #68]
// 8th classical key update
and r2, r10, r4, lsr #12
and r3, r4, r9
orr r2, r2, r3, lsl #4
and r3, r12, r4, lsr #2
orr r2, r2, r3
and r4, r4, #0x00030000
orr r4, r2, r4, lsl #14
str.w r6, [r1, #72]
str.w r4, [r1, #76]
// rearrange the rkeys to their respective new representations
// REARRANGE_RKEY_0
movw r3, #0x0055
movt r3, #0x0055 //r3 <- 0x00550055
movw r10, #0x3333 //r10<- 0x00003333
movw r11, #0x000f
movt r11, #0x000f //r11<- 0x000f000f
ldrd r6, r4, [r1]
eor r12, r6, r6, lsr #9
and r12, r12, r3
eor r6, r12
eor r6, r6, r12, lsl #9 //SWAPMOVE(r6, r6, 0x00550055, 9);
eor r12, r4, r4, lsr #9
and r12, r12, r3
eor r4, r12
eor r4, r4, r12, lsl #9 //SWAPMOVE(r4, r4, 0x00550055, 9);
eor r12, r6, r6, lsr #18
and r12, r12, r10
eor r6, r12
eor r6, r6, r12, lsl #18 //SWAPMOVE(r6, r6, 0x3333, 18);
eor r12, r4, r4, lsr #18
and r12, r12, r10
eor r4, r12
eor r4, r4, r12, lsl #18 //SWAPMOVE(r4, r4, 0x3333, 18);
eor r12, r6, r6, lsr #12
and r12, r12, r11
eor r6, r12
eor r6, r6, r12, lsl #12 //SWAPMOVE(r6, r6, 0x000f000f, 12);
eor r12, r4, r4, lsr #12
and r12, r12, r11
eor r4, r12
eor r4, r4, r12, lsl #12 //SWAPMOVE(r4, r4, 0x000f000f, 12);
eor r12, r6, r6, lsr #24
and r12, r12, #0xff
eor r6, r12
eor r6, r6, r12, lsl #24 //SWAPMOVE(r6, r6, 0x000000ff, 24);
eor r12, r4, r4, lsr #24
and r12, r12, #0xff
eor r4, r12
eor r4, r4, r12, lsl #24 //SWAPMOVE(r4, r4, 0x000000ff, 24);
strd r6, r4, [r1]
ldrd r6, r4, [r1, #40]
eor r12, r6, r6, lsr #9
and r12, r12, r3
eor r6, r12
eor r6, r6, r12, lsl #9 //SWAPMOVE(r6, r6, 0x00550055, 9);
eor r12, r4, r4, lsr #9
and r12, r12, r3
eor r4, r12
eor r4, r4, r12, lsl #9 //SWAPMOVE(r4, r4, 0x00550055, 9);
eor r12, r6, r6, lsr #18
and r12, r12, r10
eor r6, r12
eor r6, r6, r12, lsl #18 //SWAPMOVE(r6, r6, 0x3333, 18);
eor r12, r4, r4, lsr #18
and r12, r12, r10
eor r4, r12
eor r4, r4, r12, lsl #18 //SWAPMOVE(r4, r4, 0x3333, 18);
eor r12, r6, r6, lsr #12
and r12, r12, r11
eor r6, r12
eor r6, r6, r12, lsl #12 //SWAPMOVE(r6, r6, 0x000f000f, 12);
eor r12, r4, r4, lsr #12
and r12, r12, r11
eor r4, r12
eor r4, r4, r12, lsl #12 //SWAPMOVE(r4, r4, 0x000f000f, 12);
eor r12, r6, r6, lsr #24
and r12, r12, #0xff
eor r6, r12
eor r6, r6, r12, lsl #24 //SWAPMOVE(r6, r6, 0x000000ff, 24);
eor r12, r4, r4, lsr #24
and r12, r12, #0xff
eor r4, r12
eor r4, r4, r12, lsl #24 //SWAPMOVE(r4, r4, 0x000000ff, 24);
str.w r6, [r1, #40]
str.w r4, [r1, #44]
// REARRANGE_RKEY_1
movw r3, #0x1111
movt r3, #0x1111
movw r10, #0x0303
movt r10, #0x0303
ldrd r5, r7, [r1, #8]
eor r8, r7, r7, lsr #3
and r8, r8, r3
eor r7, r8
eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x11111111, 3);
eor r8, r5, r5, lsr #3
and r8, r8, r3
eor r5, r8
eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x11111111, 3);
eor r8, r7, r7, lsr #6
and r8, r8, r10
eor r7, r8
eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x03030303, 6);
eor r8, r5, r5, lsr #6
and r8, r8, r10
eor r5, r8
eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x03030303, 6);
eor r8, r7, r7, lsr #12
and r8, r8, r11
eor r7, r8
eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r5, r5, lsr #12
and r8, r8, r11
eor r5, r8
eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r7, r7, lsr #24
and r8, r8, #0xff
eor r7, r8
eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24);
eor r8, r5, r5, lsr #24
and r8, r8, #0xff
eor r5, r8
eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24);
ldr.w r12, [r1, #48]
ldr.w r14, [r1, #52]
str.w r5, [r1, #8]
str.w r7, [r1, #12]
eor r8, r14, r14, lsr #3
and r8, r8, r3
eor r14, r8
eor r14, r14, r8, lsl #3 //SWAPMOVE(r7, r7, 0x11111111, 3);
eor r8, r12, r12, lsr #3
and r8, r8, r3
eor r12, r8
eor r12, r12, r8, lsl #3 //SWAPMOVE(r5, r5, 0x11111111, 3);
eor r8, r14, r14, lsr #6
and r8, r8, r10
eor r14, r8
eor r14, r14, r8, lsl #6 //SWAPMOVE(r7, r7, 0x03030303, 6);
eor r8, r12, r12, lsr #6
and r8, r8, r10
eor r12, r8
eor r12, r12, r8, lsl #6 //SWAPMOVE(r5, r5, 0x03030303, 6);
eor r8, r14, r14, lsr #12
and r8, r8, r11
eor r14, r8
eor r14, r14, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r12, r12, lsr #12
and r8, r8, r11
eor r12, r8
eor r12, r12, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r14, r14, lsr #24
and r8, r8, #0xff
eor r14, r8
eor r14, r14, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24);
eor r8, r12, r12, lsr #24
and r8, r8, #0xff
eor r12, r8
eor r12, r12, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24);
str.w r12, [r1, #48]
str.w r14, [r1, #52]
// REARRANGE_RKEY_2
movw r3, #0xaaaa
movw r10, #0x3333
movw r11, #0xf0f0
ldrd r5, r7, [r1, #16]
eor r8, r7, r7, lsr #15
and r8, r8, r3
eor r7, r8
eor r7, r7, r8, lsl #15 //SWAPMOVE(r7, r7, 0x0000aaaa, 15);
eor r8, r5, r5, lsr #15
and r8, r8, r3
eor r5, r8
eor r5, r5, r8, lsl #15 //SWAPMOVE(r5, r5, 0x0000aaaa, 15);
eor r8, r7, r7, lsr #18
and r8, r8, r10
eor r7, r8
eor r7, r7, r8, lsl #18 //SWAPMOVE(r7, r7, 0x00003333, 18);
eor r8, r5, r5, lsr #18
and r8, r8, r10
eor r5, r8
eor r5, r5, r8, lsl #18 //SWAPMOVE(r5, r5, 0x00003333, 18);
eor r8, r7, r7, lsr #12
and r8, r8, r11
eor r7, r8
eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r5, r5, lsr #12
and r8, r8, r11
eor r5, r8
eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r7, r7, lsr #24
and r8, r8, #0xff
eor r7, r8
eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x00000ff, 24);
eor r8, r5, r5, lsr #24
and r8, r8, #0xff
eor r5, r8
eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24);
strd r5, r7, [r1, #16]
ldrd r5, r7, [r1, #56]
eor r8, r7, r7, lsr #15
and r8, r8, r3
eor r7, r8
eor r7, r7, r8, lsl #15 //SWAPMOVE(r7, r7, 0x0000aaaa, 15);
eor r8, r5, r5, lsr #15
and r8, r8, r3
eor r5, r8
eor r5, r5, r8, lsl #15 //SWAPMOVE(r5, r5, 0x0000aaaa, 15);
eor r8, r7, r7, lsr #18
and r8, r8, r10
eor r7, r8
eor r7, r7, r8, lsl #18 //SWAPMOVE(r7, r7, 0x00003333, 18);
eor r8, r5, r5, lsr #18
and r8, r8, r10
eor r5, r8
eor r5, r5, r8, lsl #18 //SWAPMOVE(r5, r5, 0x00003333, 18);
eor r8, r7, r7, lsr #12
and r8, r8, r11
eor r7, r8
eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r5, r5, lsr #12
and r8, r8, r11
eor r5, r8
eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r7, r7, lsr #24
and r8, r8, #0xff
eor r7, r8
eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24);
eor r8, r5, r5, lsr #24
and r8, r8, #0xff
eor r5, r8
eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24);
str.w r5, [r1, #56]
str.w r7, [r1, #60]
// REARRANGE_RKEY_3
movw r3, #0x0a0a
movt r3, #0x0a0a //r3 <- 0x0a0a0a0a
movw r10, #0x00cc
movt r10, #0x00cc //r10<- 0x00cc00cc
ldrd r5, r7, [r1, #24]
eor r8, r7, r7, lsr #3
and r8, r8, r3
eor r7, r8
eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x0a0a0a0a, 3);
eor r8, r5, r5, lsr #3
and r8, r8, r3
eor r5, r8
eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x0a0a0a0a, 3);
eor r8, r7, r7, lsr #6
and r8, r8, r10
eor r7, r8
eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x00cc00cc, 6);
eor r8, r5, r5, lsr #6
and r8, r8, r10
eor r5, r8
eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x00cc00cc, 6);
eor r8, r7, r7, lsr #12
and r8, r8, r11
eor r7, r8
eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r5, r5, lsr #12
and r8, r8, r11
eor r5, r8
eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r7, r7, lsr #24
and r8, r8, #0xff
eor r7, r8
eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24);
eor r8, r5, r5, lsr #24
and r8, r8, #0xff
eor r5, r8
eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24);
strd r5, r7, [r1, #24]
ldrd r5, r7, [r1, #64]
eor r8, r7, r7, lsr #3
and r8, r8, r3
eor r7, r8
eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x0a0a0a0a, 3);
eor r8, r5, r5, lsr #3
and r8, r8, r3
eor r5, r8
eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x0a0a0a0a, 3);
eor r8, r7, r7, lsr #6
and r8, r8, r10
eor r7, r8
eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x00cc00cc, 6);
eor r8, r5, r5, lsr #6
and r8, r8, r10
eor r5, r8
eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x00cc00cc, 6);
eor r8, r7, r7, lsr #12
and r8, r8, r11
eor r7, r8
eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12);
eor r8, r5, r5, lsr #12
and r8, r8, r11
eor r5, r8
eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12);
eor r8, r7, r7, lsr #24
and r8, r8, #0xff
eor r7, r8
eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x0000ff00, 24);
eor r8, r5, r5, lsr #24
and r8, r8, #0xff
eor r5, r8
eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x0000ff00, 24);
str.w r5, [r1, #64]
str.w r7, [r1, #68]
//keyschedule according to the new representations
// KEY_DOULBE/TRIPLE_UPDATE_0
movw r10, #0x3333
eor r12, r10, r10, lsl #16
mvn r11, r12
movw r9, #0x4444
movt r9, #0x5555
movw r8, #0x1100
movt r8, #0x5555
ldrd r4, r5, [r1]
and r2, r12, r4, ror #24
and r4, r4, r11
orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4)
eor r2, r4, r4, lsr #1
and r2, r2, r8
eor r4, r4, r2
eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1)
eor r2, r5, r5, lsr #16
and r2, r2, r10
eor r5, r5, r2
eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16)
eor r2, r5, r5, lsr #1
and r2, r2, r9
eor r5, r5, r2
eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1)
str.w r5, [r1, #80]
str.w r4, [r1, #84]
and r2, r12, r5, ror #24
and r5, r5, r11
orr r5, r2, r5, ror #16 //KEY_TRIPLE_UPDATE_1(r5)
eor r2, r5, r5, lsr #1
and r2, r2, r8
eor r5, r5, r2
eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x55551100, 1)
eor r2, r4, r4, lsr #16
and r2, r2, r10
eor r4, r4, r2
eor r4, r4, r2, lsl #16 //SWAPMOVE(r4, r4, 0x00003333, 16)
eor r2, r4, r4, lsr #1
and r2, r2, r9
eor r4, r4, r2
eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x555544444, 1)
str.w r4, [r1, #160]
str.w r5, [r1, #164]
and r2, r12, r4, ror #24
and r4, r4, r11
orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4)
eor r2, r4, r4, lsr #1
and r2, r2, r8
eor r4, r4, r2
eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1)
eor r2, r5, r5, lsr #16
and r2, r2, r10
eor r5, r5, r2
eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16)
eor r2, r5, r5, lsr #1
and r2, r2, r9
eor r5, r5, r2
eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1)
strd r5, r4, [r1, #240]
ldrd r4, r5, [r1, #40]
and r2, r12, r4, ror #24
and r4, r4, r11
orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4)
eor r2, r4, r4, lsr #1
and r2, r2, r8
eor r4, r4, r2
eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1)
eor r2, r5, r5, lsr #16
and r2, r2, r10
eor r5, r5, r2
eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16)
eor r2, r5, r5, lsr #1
and r2, r2, r9
eor r5, r5, r2
eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1)
str.w r5, [r1, #120]
str.w r4, [r1, #124]
and r2, r12, r5, ror #24
and r5, r5, r11
orr r5, r2, r5, ror #16 //KEY_TRIPLE_UPDATE_1(r5)
eor r2, r5, r5, lsr #1
and r2, r2, r8
eor r5, r5, r2
eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x55551100, 1)
eor r2, r4, r4, lsr #16
and r2, r2, r10
eor r4, r4, r2
eor r4, r4, r2, lsl #16 //SWAPMOVE(r4, r4, 0x00003333, 16)
eor r2, r4, r4, lsr #1
and r2, r2, r9
eor r4, r4, r2
eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x555544444, 1)
str.w r4, [r1, #200]
str.w r5, [r1, #204]
and r2, r12, r4, ror #24
and r4, r4, r11
orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4)
eor r2, r4, r4, lsr #1
and r2, r2, r8
eor r4, r4, r2
eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1)
eor r2, r5, r5, lsr #16
and r2, r2, r10
eor r5, r5, r2
eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16)
eor r2, r5, r5, lsr #1
and r2, r2, r9
eor r5, r5, r2
eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1)
str.w r5, [r1, #280]
str.w r4, [r1, #284]
// KEY_DOULBE/TRIPLE_UPDATE_2
// masks
movw r12, #0x0f00
movt r12, #0x0f00
movw r11, #0x0003
movt r11, #0x0003
movw r10, #0x003f
movt r10, #0x003f
lsl r9, r11, #8 //r9 <- 0x03000300
and r8, r10, r10, lsr #3 //r8 <- 0x00070007
orr r7, r8, r8, lsl #2 //r7 <- 0x001f001f
ldrd r4, r5, [r1, #8]
and r2, r9, r4, lsr #6
and r3, r4, r10, lsl #8
orr r2, r2, r3, lsl #2
and r3, r8, r4, lsr #5
orr r2, r2, r3
and r4, r4, r7
orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4)
and r2, r12, r5, lsr #4
and r3, r5, r12
orr r2, r2, r3, lsl #4
and r3, r11, r5, lsr #6
orr r2, r2, r3
and r5, r5, r10
orr r5, r2, r5, lsl #2 //KEY_DOUBLE_UPDATE_2(r5)
str.w r5, [r1, #88]
str.w r4, [r1, #92]
and r2, r9, r5, lsr #6
and r3, r5, r10, lsl #8
orr r2, r2, r3, lsl #2
and r3, r8, r5, lsr #5
orr r2, r2, r3
and r5, r5, r7
orr r5, r2, r5, lsl #3 //KEY_TRIPLE_UPDATE_2(r5)
and r2, r12, r4, lsr #4
and r3, r4, r12
orr r2, r2, r3, lsl #4
and r3, r11, r4, lsr #6
orr r2, r2, r3
and r4, r4, r10
orr r4, r2, r4, lsl #2 //KEY_DOUBLE_UPDATE_2(r4)
str.w r4, [r1, #168]
str.w r5, [r1, #172]
and r2, r9, r4, lsr #6
and r3, r4, r10, lsl #8
orr r2, r2, r3, lsl #2
and r3, r8, r4, lsr #5
orr r2, r2, r3
and r4, r4, r7
orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4)
and r2, r12, r5, lsr #4
and r3, r5, r12
orr r2, r2, r3, lsl #4
and r3, r11, r5, lsr #6
orr r2, r2, r3
and r5, r5, r10
orr r5, r2, r5, lsl#2 //KEY_DOUBLE_UPDATE_2(r5)
strd r5, r4, [r1, #248]
ldrd r4, r5, [r1, #48]
and r2, r9, r4, lsr #6
and r3, r4, r10, lsl #8
orr r2, r2, r3, lsl #2
and r3, r8, r4, lsr #5
orr r2, r2, r3
and r4, r4, r7
orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4)
and r2, r12, r5, lsr #4
and r3, r5, r12
orr r2, r2, r3, lsl #4
and r3, r11, r5, lsr #6
orr r2, r2, r3
and r5, r5, r10
orr r5, r2, r5, lsl #2 //KEY_DOUBLE_UPDATE_2(r5)
str.w r5, [r1, #128]
str.w r4, [r1, #132]
and r2, r9, r5, lsr #6
and r3, r5, r10, lsl #8
orr r2, r2, r3, lsl #2
and r3, r8, r5, lsr #5
orr r2, r2, r3
and r5, r5, r7
orr r5, r2, r5, lsl #3 //KEY_TRIPLE_UPDATE_2(r5)
and r2, r12, r4, lsr #4
and r3, r4, r12
orr r2, r2, r3, lsl #4
and r3, r11, r4, lsr #6
orr r2, r2, r3
and r4, r4, r10
orr r4, r2, r4, lsl #2 //KEY_DOUBLE_UPDATE_2(r4)
str.w r4, [r1, #208]
str.w r5, [r1, #212]
and r2, r9, r4, lsr #6
and r3, r4, r10, lsl #8
orr r2, r2, r3, lsl #2
and r3, r8, r4, lsr #5
orr r2, r2, r3
and r4, r4, r7
orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4)
and r2, r12, r5, lsr #4
and r3, r5, r12
orr r2, r2, r3, lsl #4
and r3, r11, r5, lsr #6
orr r2, r2, r3
and r5, r5, r10
orr r5, r2, r5, lsl#2 //KEY_DOUBLE_UPDATE_2(r5)
str.w r5, [r1, #288]
str.w r4, [r1, #292]
// KEY_DOULBE/TRIPLE_UPDATE_2
// masks
movw r12, #0x5555
movt r12, #0x5555
mvn r11, r12
ldrd r4, r5, [r1, #16]
and r2, r12, r4, ror #24
and r4, r11, r4, ror #20
orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r4)
and r2, r11, r5, ror #24
and r5, r12, r5, ror #16
orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r5)
str.w r5, [r1, #96]
str.w r4, [r1, #100]
and r2, r12, r5, ror #24
and r5, r11, r5, ror #20
orr r5, r5, r2 //KEY_TRIPLE_UPDATE_2(r5)
and r2, r11, r4, ror #24
and r4, r12, r4, ror #16
orr r4, r4, r2 //KEY_DOUBLE_UPDATE_2(r4)
str.w r4, [r1, #176]
str.w r5, [r1, #180]
and r2, r12, r4, ror #24
and r4, r11, r4, ror #20
orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r4)
and r2, r11, r5, ror #24
and r5, r12, r5, ror #16
orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r5)
strd r5, r4, [r1, #256]
ldrd r4, r5, [r1, #56]
and r2, r12, r4, ror #24
and r4, r11, r4, ror #20
orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r5)
and r2, r11, r5, ror #24
and r5, r12, r5, ror #16
orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r4)
str.w r5, [r1, #136]
str.w r4, [r1, #140]
and r2, r12, r5, ror #24
and r5, r11, r5, ror #20
orr r5, r5, r2 //KEY_TRIPLE_UPDATE_2(r4)
and r2, r11, r4, ror #24
and r4, r12, r4, ror #16
orr r4, r4, r2 //KEY_DOUBLE_UPDATE_2(r5)
str.w r4, [r1, #216]
str.w r5, [r1, #220]
and r2, r12, r4, ror #24
and r4, r11, r4, ror #20
orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r5)
and r2, r11, r5, ror #24
and r5, r12, r5, ror #16
orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r4)
str.w r5, [r1, #296]
str.w r4, [r1, #300]
// KEY_DOULBE/TRIPLE_UPDATE_3
// masks
orr r12, r8, r8, lsl #8 //r12<- 0x07070707
movw r11, #0xc0c0
movw r10, #0x3030
and r9, r12, r12, lsr #1 //r9 <- 0x03030303
lsl r8, r12, #4
eor r7, r8, r9, lsl #5
movw r6, #0xf0f0
ldrd r4, r5, [r1, #24]
and r2, r10, r4, lsr #18
and r3, r4, r7, lsr #4
orr r2, r2, r3, lsl #3
and r3, r11, r4, lsr #14
orr r2, r2, r3
and r3, r4, r12, lsr #11
orr r2, r2, r3, lsl #15
and r3, r12, r4, lsr #1
orr r2, r2, r3
and r4, r4, r7, lsr #16
orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r9, r5, lsr #2
and r3, r9, r5
orr r2, r2, r3, lsl #2
and r3, r8, r5, lsr #1
orr r2, r2, r3
and r5, r5, r7
orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5)
str.w r5, [r1, #104]
str.w r4, [r1, #108]
and r2, r10, r5, lsr #18
and r3, r5, r7, lsr #4
orr r2, r2, r3, lsl #3
and r3, r11, r5, lsr #14
orr r2, r2, r3
and r3, r5, r12, lsr #11
orr r2, r2, r3, lsl #15
and r3, r12, r5, lsr #1
orr r2, r2, r3
and r5, r5, r7, lsr #16
orr r5, r2, r5, lsl #19 //KEY_TRIPLE_UPDATE_4(r5)
and r2, r9, r4, lsr #2
and r3, r9, r4
orr r2, r2, r3, lsl #2
and r3, r8, r4, lsr #1
orr r2, r2, r3
and r4, r4, r7
orr r4, r2, r4, lsl #3 //KEY_DOUBLE_UPDATE_4(r4)
str.w r4, [r1, #184]
str.w r5, [r1, #188]
and r2, r10, r4, lsr #18
and r3, r4, r7, lsr #4
orr r2, r2, r3, lsl #3
and r3, r11, r4, lsr #14
orr r2, r2, r3
and r3, r4, r12, lsr #11
orr r2, r2, r3, lsl #15
and r3, r12, r4, lsr #1
orr r2, r2, r3
and r4, r4, r7, lsr #16
orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r9, r5, lsr #2
and r3, r9, r5
orr r2, r2, r3, lsl #2
and r3, r8, r5, lsr #1
orr r2, r2, r3
and r5, r5, r7
orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5)
strd r5, r4, [r1, #264]
ldrd r4, r5, [r1, #64]
and r2, r10, r4, lsr #18
and r3, r4, r7, lsr #4
orr r2, r2, r3, lsl #3
and r3, r11, r4, lsr #14
orr r2, r2, r3
and r3, r4, r12, lsr #11
orr r2, r2, r3, lsl #15
and r3, r12, r4, lsr #1
orr r2, r2, r3
and r4, r4, r7, lsr #16
orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r9, r5, lsr #2
and r3, r9, r5
orr r2, r2, r3, lsl #2
and r3, r8, r5, lsr #1
orr r2, r2, r3
and r5, r5, r7
orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5)
str.w r5, [r1, #144]
str.w r4, [r1, #148]
and r2, r10, r5, lsr #18
and r3, r5, r7, lsr #4
orr r2, r2, r3, lsl #3
and r3, r11, r5, lsr #14
orr r2, r2, r3
and r3, r5, r12, lsr #11
orr r2, r2, r3, lsl #15
and r3, r12, r5, lsr #1
orr r2, r2, r3
and r5, r5, r7, lsr #16
orr r5, r2, r5, lsl #19 //KEY_TRIPLE_UPDATE_4(r5)
and r2, r9, r4, lsr #2
and r3, r9, r4
orr r2, r2, r3, lsl #2
and r3, r8, r4, lsr #1
orr r2, r2, r3
and r4, r4, r7
orr r4, r2, r4, lsl #3 //KEY_DOUBLE_UPDATE_4(r4)
str.w r4, [r1, #224]
str.w r5, [r1, #228]
and r2, r10, r4, lsr #18
and r3, r4, r7, lsr #4
orr r2, r2, r3, lsl #3
and r3, r11, r4, lsr #14
orr r2, r2, r3
and r3, r4, r12, lsr #11
orr r2, r2, r3, lsl #15
and r3, r12, r4, lsr #1
orr r2, r2, r3
and r4, r4, r7, lsr #16
orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r9, r5, lsr #2
and r3, r9, r5
orr r2, r2, r3, lsl #2
and r3, r8, r5, lsr #1
orr r2, r2, r3
and r5, r5, r7
orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5)
str.w r5, [r1, #304]
str.w r4, [r1, #308]
// KEY_DOULBE/TRIPLE_UPDATE_4
// masks
movw r12, #0x0fff
lsl r10, r12, #16
movw r8, #0x00ff
movw r7, #0x03ff
lsl r7, r7, #16
ldrd r4, r5, [r1, #32]
and r2, r7, r4, lsr #6
and r3, r4, #0x003f0000
orr r2, r2, r3, lsl #10
and r3, r12, r4, lsr #4
orr r2, r2, r3
and r4, r4, #0x000f
orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r10, r5, lsr #4
and r3, r5, #0x000f0000
orr r2, r2, r3, lsl #12
and r3, r8, r5, lsr #8
orr r2, r2, r3
and r5, r5, r8
orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5)
str.w r5, [r1, #112]
str.w r4, [r1, #116]
and r2, r7, r5, lsr #6
and r3, r5, #0x003f0000
orr r2, r2, r3, lsl #10
and r3, r12, r5, lsr #4
orr r2, r2, r3
and r5, r5, #0x000f
orr r5, r2, r5, lsl #12 //KEY_TRIPLE_UPDATE_4(r5)
and r2, r10, r4, lsr #4
and r3, r4, #0x000f0000
orr r2, r2, r3, lsl #12
and r3, r8, r4, lsr #8
orr r2, r2, r3
and r4, r4, r8
orr r4, r2, r4, lsl #8 //KEY_DOUBLE_UPDATE_4(r4)
str.w r4, [r1, #192]
str.w r5, [r1, #196]
and r2, r7, r4, lsr #6
and r3, r4, #0x003f0000
orr r2, r2, r3, lsl #10
and r3, r12, r4, lsr #4
orr r2, r2, r3
and r4, r4, #0x000f
orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r10, r5, lsr #4
and r3, r5, #0x000f0000
orr r2, r2, r3, lsl #12
and r3, r8, r5, lsr #8
orr r2, r2, r3
and r5, r5, r8
orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5)
strd r5, r4, [r1, #272]
ldrd r4, r5, [r1, #72]
and r2, r7, r4, lsr #6
and r3, r4, #0x003f0000
orr r2, r2, r3, lsl #10
and r3, r12, r4, lsr #4
orr r2, r2, r3
and r4, r4, #0x000f
orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r10, r5, lsr #4
and r3, r5, #0x000f0000
orr r2, r2, r3, lsl #12
and r3, r8, r5, lsr #8
orr r2, r2, r3
and r5, r5, r8
orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5)
str.w r5, [r1, #152]
str.w r4, [r1, #156]
and r2, r7, r5, lsr #6
and r3, r5, #0x003f0000
orr r2, r2, r3, lsl #10
and r3, r12, r5, lsr #4
orr r2, r2, r3
and r5, r5, #0x000f
orr r5, r2, r5, lsl #12 //KEY_TRIPLE_UPDATE_4(r5)
and r2, r10, r4, lsr #4
and r3, r4, #0x000f0000
orr r2, r2, r3, lsl #12
and r3, r8, r4, lsr #8
orr r2, r2, r3
and r4, r4, r8
orr r4, r2, r4, lsl #8 //KEY_DOUBLE_UPDATE_4(r4)
str.w r4, [r1, #232]
str.w r5, [r1, #236]
and r2, r7, r4, lsr #6
and r3, r4, #0x003f0000
orr r2, r2, r3, lsl #10
and r3, r12, r4, lsr #4
orr r2, r2, r3
and r4, r4, #0x000f
orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4)
and r2, r10, r5, lsr #4
and r3, r5, #0x000f0000
orr r2, r2, r3, lsl #12
and r3, r8, r5, lsr #8
orr r2, r2, r3
and r5, r5, r8
orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5)
str.w r5, [r1, #312]
str.w r4, [r1, #316]
pop {r2-r12,r14}
bx lr
/*****************************************************************************
* Fully unrolled ARM assembly implementation of the GIFTb-128 block cipher.
* This function simply encrypts a 128-bit block, without any operation mode.
*****************************************************************************/
@ void giftb128_encrypt_block(u8 *out, const u32* rkey, const u8 *block)
.global giftb128_encrypt_block
.type giftb128_encrypt_block,%function
giftb128_encrypt_block:
push {r2-r12,r14}
// load plaintext blocks
ldm r2, {r9-r12}
// endianness
rev r9, r9
rev r10, r10
rev r11, r11
rev r12, r12
// masks for HALF/BYTE/NIBBLE rotations
movw r2, #0x1111
movt r2, #0x1111 //for NIBBLE_ROR
movw r3, #0x000f
movt r3, #0x000f //for HALF_ROR
mvn r4, r2, lsl #3 //0x7777777 for NIBBLE_ROR
// ------------------ 1st QUINTUPLE ROUND ------------------
// 1st round
movw r5, 0x0008
movt r5, 0x1000 //load rconst
ldrd r6, r7, [r1] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
and r8, r4, r12, lsr #1
and r12, r12, r2
orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1)
and r8, r4, r11
and r11, r2, r11, lsr #3
orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3)
orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR
and r8, r14, r10, lsr #2
and r10, r10, r14
orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 2nd round
movw r5, 0x8000
movt r5, 0x8001 //load rconst
ldrd r6, r7, [r1, #8] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR
and r8, r14, r9, lsr #4
and r9, r9, r3
orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4)
and r8, r3, r11, lsr #12
and r11, r11, r14
orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12)
rev16 r10, r10 //HALF_ROR(r10, 8)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 3rd round
movw r5, 0x0002
movt r5, 0x5400 //load rconst
ldrd r6, r7, [r1, #16] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE
eor r8, r10, r10, lsr #1
and r8, r8, r14
eor r10, r10, r8
eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1)
eor r8, r12, r12, lsr #1
and r8, r8, r14, lsr #16
eor r12, r12, r8
eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x55550000, 1)
eor r8, r11, r11, lsr #1
and r8, r8, r14, lsl #16
eor r11, r11, r8
eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x00005555, 1)
eor r10, r10, r6 //add 1st keyword
eor r11, r7, r11, ror #16 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 4th round
movw r5, 0x0181
movt r5, 0x0101 //load rconst
ldrd r6, r7, [r1, #24] //load rkey
and r8, r11, r12, ror #16 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r8, r12, ror #16
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR
and r8, r14, r10, lsr #4
and r10, r10, r14
orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4)
orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR
mvn r8, r14
and r8, r8, r11, lsl #6
and r11, r14, r11, lsr #2
orr r11, r11, r8 //BYTE_ROR(r11, 2)
mvn r8, r14, lsr #6
and r8, r8, r9, lsr #6
and r9, r14, r9
orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 5th round
movw r5, 0x001f
movt r5, 0x8000 //load rconst
ldrd r6, r7, [r1, #32] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
eor r10, r6, r10, ror #16 //add 1st keyword
eor r11, r7, r11, ror #8 //add 2nd keyword
eor r9, r9, r5 //add rconst
// ------------------ 2nd QUINTUPLE ROUND ------------------
// 1st round
movw r5, 0x8880
movt r5, 0x1088 //load rconst
ldrd r6, r7, [r1, #40] //load rkey
and r8, r11, r12, ror #24 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r8, r12, ror #24
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
and r8, r4, r9, lsr #1
and r9, r9, r2
orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1)
and r8, r4, r11
and r11, r2, r11, lsr #3
orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3)
orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR
and r8, r14, r10, lsr #2
and r10, r10, r14
orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 2nd round
movw r5, 0xe000
movt r5, 0x6001 //load rconst
ldrd r6, r7, [r1, #48] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR
and r8, r14, r12, lsr #4
and r12, r12, r3
orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4)
and r8, r3, r11, lsr #12
and r11, r11, r14
orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12)
rev16 r10, r10 //HALF_ROR(r10, 8)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 3rd round
movw r5, 0x0002
movt r5, 0x5150 //load rconst
ldrd r6, r7, [r1, #56] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE
eor r8, r10, r10, lsr #1
and r8, r8, r14
eor r10, r10, r8
eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1)
eor r8, r9, r9, lsr #1
and r8, r8, r14, lsr #16
eor r9, r9, r8
eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1)
eor r8, r11, r11, lsr #1
and r8, r8, r14, lsl #16
eor r11, r11, r8
eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1)
eor r10, r10, r6 //add 1st keyword
eor r11, r7, r11, ror #16 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 4th round
movw r5, 0x0180
movt r5, 0x0303 //load rconst
ldrd r6, r7, [r1, #64] //load rkey
and r8, r11, r9, ror #16 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r8, r9, ror #16
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR
and r8, r14, r10, lsr #4
and r10, r10, r14
orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4)
orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR
mvn r8, r14
and r8, r8, r11, lsl #6
and r11, r14, r11, lsr #2
orr r11, r11, r8 //BYTE_ROR(r11, 2)
mvn r8, r14, lsr #6
and r8, r8, r12, lsr #6
and r12, r14, r12
orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 5th round
movw r5, 0x002f
movt r5, 0x8000 //load rconst
ldrd r6, r7, [r1, #72] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
eor r10, r6, r10, ror #16 //add 1st keyword
eor r11, r7, r11, ror #8 //add 2nd keyword
eor r12, r12, r5 //add rconst
// ------------------ 3rd QUINTUPLE ROUND ------------------
// 1st round
movw r5, 0x8880
movt r5, 0x1008 //load rconst
ldrd r6, r7, [r1, #80] //load rkey
and r8, r11, r9, ror #24 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r8, r9, ror #24
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
and r8, r4, r12, lsr #1
and r12, r12, r2
orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1)
and r8, r4, r11
and r11, r2, r11, lsr #3
orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3)
orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR
and r8, r14, r10, lsr #2
and r10, r10, r14
orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 2nd round
movw r5, 0x6000
movt r5, 0x6001 //load rconst
ldrd r6, r7, [r1, #88] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR
and r8, r14, r9, lsr #4
and r9, r9, r3
orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4)
and r8, r3, r11, lsr #12
and r11, r11, r14
orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12)
rev16 r10, r10 //HALF_ROR(r10, 8)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 3rd round
movw r5, 0x0002
movt r5, 0x4150 //load rconst
ldrd r6, r7, [r1, #96] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE
eor r8, r10, r10, lsr #1
and r8, r8, r14
eor r10, r10, r8
eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1)
eor r8, r12, r12, lsr #1
and r8, r8, r14, lsr #16
eor r12, r12, r8
eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x00005555, 1)
eor r8, r11, r11, lsr #1
and r8, r8, r14, lsl #16
eor r11, r11, r8
eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1)
eor r10, r10, r6 //add 1st keyword
eor r11, r7, r11, ror #16 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 4th round
movw r5, 0x0080
movt r5, 0x0303 //load rconst
ldrd r6, r7, [r1, #104] //load rkey
and r8, r11, r12, ror #16 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r8, r12, ror #16
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR
and r8, r14, r10, lsr #4
and r10, r10, r14
orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4)
orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR
mvn r8, r14
and r8, r8, r11, lsl #6
and r11, r14, r11, lsr #2
orr r11, r11, r8 //BYTE_ROR(r11, 2)
mvn r8, r14, lsr #6
and r8, r8, r9, lsr #6
and r9, r14, r9
orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 5th round
movw r5, 0x0027
movt r5, 0x8000 //load rconst
ldrd r6, r7, [r1, #112] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
eor r10, r6, r10, ror #16 //add 1st keyword
eor r11, r7, r11, ror #8 //add 2nd keyword
eor r9, r9, r5 //add rconst
// ------------------ 4th QUINTUPLE ROUND ------------------
// 1st round
movw r5, 0x8880
movt r5, 0x1000 //load rconst
ldrd r6, r7, [r1, #120] //load rkey
and r8, r11, r12, ror #24 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r8, r12, ror #24
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
and r8, r4, r9, lsr #1
and r9, r9, r2
orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1)
and r8, r4, r11
and r11, r2, r11, lsr #3
orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3)
orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR
and r8, r14, r10, lsr #2
and r10, r10, r14
orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 2nd round
movw r5, 0xe000
movt r5, 0x4001 //load rconst
ldrd r6, r7, [r1, #128] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR
and r8, r14, r12, lsr #4
and r12, r12, r3
orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4)
and r8, r3, r11, lsr #12
and r11, r11, r14
orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12)
rev16 r10, r10 //HALF_ROR(r10, 8)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 3rd round
movw r5, 0x0002
movt r5, 0x1150 //load rconst
ldrd r6, r7, [r1, #136] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE
eor r8, r10, r10, lsr #1
and r8, r8, r14
eor r10, r10, r8
eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1)
eor r8, r9, r9, lsr #1
and r8, r8, r14, lsr #16
eor r9, r9, r8
eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1)
eor r8, r11, r11, lsr #1
and r8, r8, r14, lsl #16
eor r11, r11, r8
eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1)
eor r10, r10, r6 //add 1st keyword
eor r11, r7, r11, ror #16 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 4th round
movw r5, 0x0180
movt r5, 0x0302 //load rconst
ldrd r6, r7, [r1, #144] //load rkey
and r8, r11, r9, ror #16 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r8, r9, ror #16
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR
and r8, r14, r10, lsr #4
and r10, r10, r14
orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4)
orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR
mvn r8, r14
and r8, r8, r11, lsl #6
and r11, r14, r11, lsr #2
orr r11, r11, r8 //BYTE_ROR(r11, 2)
mvn r8, r14, lsr #6
and r8, r8, r12, lsr #6
and r12, r14, r12
orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 5th round
movw r5, 0x002b
movt r5, 0x8000 //load rconst
ldrd r6, r7, [r1, #152] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
eor r10, r6, r10, ror #16 //add 1st keyword
eor r11, r7, r11, ror #8 //add 2nd keyword
eor r12, r12, r5 //add rconst
// ------------------ 5th QUINTUPLE ROUND ------------------
// 1st round
movw r5, 0x0880
movt r5, 0x1008 //load rconst
ldrd r6, r7, [r1, #160] //load rkey
and r8, r11, r9, ror #24 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r8, r9, ror #24
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
and r8, r4, r12, lsr #1
and r12, r12, r2
orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1)
and r8, r4, r11
and r11, r2, r11, lsr #3
orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3)
orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR
and r8, r14, r10, lsr #2
and r10, r10, r14
orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 2nd round
movw r5, 0x4000
movt r5, 0x6001 //load rconst
ldrd r6, r7, [r1, #168] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR
and r8, r14, r9, lsr #4
and r9, r9, r3
orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4)
and r8, r3, r11, lsr #12
and r11, r11, r14
orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12)
rev16 r10, r10 //HALF_ROR(r10, 8)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 3rd round
movw r5, 0x0002
movt r5, 0x0140 //load rconst
ldrd r6, r7, [r1, #176] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE
eor r8, r10, r10, lsr #1
and r8, r8, r14
eor r10, r10, r8
eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1)
eor r8, r12, r12, lsr #1
and r8, r8, r14, lsr #16
eor r12, r12, r8
eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x00005555, 1)
eor r8, r11, r11, lsr #1
and r8, r8, r14, lsl #16
eor r11, r11, r8
eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1)
eor r10, r10, r6 //add 1st keyword
eor r11, r7, r11, ror #16 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 4th round
movw r5, 0x0080
movt r5, 0x0202 //load rconst
ldrd r6, r7, [r1, #184] //load rkey
and r8, r11, r12, ror #16 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r8, r12, ror #16
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR
and r8, r14, r10, lsr #4
and r10, r10, r14
orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4)
orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR
mvn r8, r14
and r8, r8, r11, lsl #6
and r11, r14, r11, lsr #2
orr r11, r11, r8 //BYTE_ROR(r11, 2)
mvn r8, r14, lsr #6
and r8, r8, r9, lsr #6
and r9, r14, r9
orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 5th round
movw r5, 0x0021
movt r5, 0x8000 //load rconst
ldrd r6, r7, [r1, #192] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
eor r10, r6, r10, ror #16 //add 1st keyword
eor r11, r7, r11, ror #8 //add 2nd keyword
eor r9, r9, r5 //add rconst
// ------------------ 6th QUINTUPLE ROUND ------------------
// 1st round
movw r5, 0x0080
movt r5, 0x1000 //load rconst
ldrd r6, r7, [r1, #200] //load rkey
and r8, r11, r12, ror #24 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r8, r12, ror #24
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
and r8, r4, r9, lsr #1
and r9, r9, r2
orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1)
and r8, r4, r11
and r11, r2, r11, lsr #3
orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3)
orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR
and r8, r14, r10, lsr #2
and r10, r10, r14
orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 2nd round
movw r5, 0xc000
movt r5, 0x0001 //load rconst
ldrd r6, r7, [r1, #208] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR
and r8, r14, r12, lsr #4
and r12, r12, r3
orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4)
and r8, r3, r11, lsr #12
and r11, r11, r14
orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12)
rev16 r10, r10 //HALF_ROR(r10, 8)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 3rd round
movw r5, 0x0002
movt r5, 0x5100 //load rconst
ldrd r6, r7, [r1, #216] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE
eor r8, r10, r10, lsr #1
and r8, r8, r14
eor r10, r10, r8
eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1)
eor r8, r9, r9, lsr #1
and r8, r8, r14, lsr #16
eor r9, r9, r8
eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1)
eor r8, r11, r11, lsr #1
and r8, r8, r14, lsl #16
eor r11, r11, r8
eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1)
eor r10, r10, r6 //add 1st keyword
eor r11, r7, r11, ror #16 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 4th round
movw r5, 0x0180
movt r5, 0x0301 //load rconst
ldrd r6, r7, [r1, #224] //load rkey
and r8, r11, r9, ror #16 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r8, r9, ror #16
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR
and r8, r14, r10, lsr #4
and r10, r10, r14
orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4)
orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR
mvn r8, r14
and r8, r8, r11, lsl #6
and r11, r14, r11, lsr #2
orr r11, r11, r8 //BYTE_ROR(r11, 2)
mvn r8, r14, lsr #6
and r8, r8, r12, lsr #6
and r12, r14, r12
orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 5th round
movw r5, 0x002e
movt r5, 0x8000 //load rconst
ldrd r6, r7, [r1, #232] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
eor r10, r6, r10, ror #16 //add 1st keyword
eor r11, r7, r11, ror #8 //add 2nd keyword
eor r12, r12, r5 //add rconst
// ------------------ 7th QUINTUPLE ROUND ------------------
// 1st round
movw r5, 0x8800
movt r5, 0x1008 //load rconst
ldrd r6, r7, [r1, #240] //load rkey
and r8, r11, r9, ror #24 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r8, r9, ror #24
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
and r8, r4, r12, lsr #1
and r12, r12, r2
orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1)
and r8, r4, r11
and r11, r2, r11, lsr #3
orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3)
orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR
and r8, r14, r10, lsr #2
and r10, r10, r14
orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 2nd round
movw r5, 0x2000
movt r5, 0x6001 //load rconst
ldrd r6, r7, [r1, #248] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR
and r8, r14, r9, lsr #4
and r9, r9, r3
orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4)
and r8, r3, r11, lsr #12
and r11, r11, r14
orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12)
rev16 r10, r10 //HALF_ROR(r10, 8)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 3rd round
movw r5, 0x0002
movt r5, 0x4050 //load rconst
ldrd r6, r7, [r1, #256] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE
eor r8, r10, r10, lsr #1
and r8, r8, r14
eor r10, r10, r8
eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1)
eor r8, r12, r12, lsr #1
and r8, r8, r14, lsr #16
eor r12, r12, r8
eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x00005555, 1)
eor r8, r11, r11, lsr #1
and r8, r8, r14, lsl #16
eor r11, r11, r8
eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1)
eor r10, r10, r6 //add 1st keyword
eor r11, r7, r11, ror #16 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 4th round
movw r5, 0x0080
movt r5, 0x0103 //load rconst
ldrd r6, r7, [r1, #264] //load rkey
and r8, r11, r12, ror #16 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r8, r12, ror #16
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR
and r8, r14, r10, lsr #4
and r10, r10, r14
orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4)
orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR
mvn r8, r14
and r8, r8, r11, lsl #6
and r11, r14, r11, lsr #2
orr r11, r11, r8 //BYTE_ROR(r11, 2)
mvn r8, r14, lsr #6
and r8, r8, r9, lsr #6
and r9, r14, r9
orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 5th round
movw r5, 0x0006
movt r5, 0x8000 //load rconst
ldrd r6, r7, [r1, #272] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
eor r10, r6, r10, ror #16 //add 1st keyword
eor r11, r7, r11, ror #8 //add 2nd keyword
eor r9, r9, r5 //add rconst
// ------------------ 8th QUINTUPLE ROUND ------------------
// 1st round
movw r5, 0x8808
movt r5, 0x1000 //load rconst
ldrd r6, r7, [r1, #280] //load rkey
and r8, r11, r12, ror #24 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r8, r12, ror #24
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
and r8, r4, r9, lsr #1
and r9, r9, r2
orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1)
and r8, r4, r11
and r11, r2, r11, lsr #3
orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3)
orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR
and r8, r14, r10, lsr #2
and r10, r10, r14
orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 2nd round
movw r5, 0xa000
movt r5, 0xc001 //load rconst
ldrd r6, r7, [r1, #288] //load rkey
and r8, r9, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r9, r8
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR
and r8, r14, r12, lsr #4
and r12, r12, r3
orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4)
and r8, r3, r11, lsr #12
and r11, r11, r14
orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12)
rev16 r10, r10 //HALF_ROR(r10, 8)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 3rd round
movw r5, 0x0002
movt r5, 0x1450 //load rconst
ldrd r6, r7, [r1, #296] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9
orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE
eor r8, r10, r10, lsr #1
and r8, r8, r14
eor r10, r10, r8
eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1)
eor r8, r9, r9, lsr #1
and r8, r8, r14, lsr #16
eor r9, r9, r8
eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1)
eor r8, r11, r11, lsr #1
and r8, r8, r14, lsl #16
eor r11, r11, r8
eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1)
eor r10, r10, r6 //add 1st keyword
eor r11, r7, r11, ror #16 //add 2nd keyword
eor r12, r12, r5 //add rconst
// 4th round
movw r5, 0x0181
movt r5, 0x0102 //load rconst
ldrd r6, r7, [r1, #304] //load rkey
and r8, r11, r9, ror #16 //sbox layer
eor r10, r10, r8
and r8, r10, r12
eor r9, r8, r9, ror #16
orr r8, r9, r10
eor r11, r11, r8
eor r12, r12, r11
eor r10, r10, r12
and r8, r9, r10
eor r11, r11, r8
mvn r12, r12
eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR
and r8, r14, r10, lsr #4
and r10, r10, r14
orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4)
orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR
mvn r8, r14
and r8, r8, r11, lsl #6
and r11, r14, r11, lsr #2
orr r11, r11, r8 //BYTE_ROR(r11, 2)
mvn r8, r14, lsr #6
and r8, r8, r12, lsr #6
and r12, r14, r12
orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6)
eor r10, r10, r6 //add 1st keyword
eor r11, r11, r7 //add 2nd keyword
eor r9, r9, r5 //add rconst
// 5th round
movw r5, 0x001a
movt r5, 0x8000 //load rconst
ldrd r6, r7, [r1, #312] //load rkey
and r8, r12, r11 //sbox layer
eor r10, r10, r8
and r8, r10, r9
eor r12, r12, r8
orr r8, r12, r10
eor r11, r11, r8
eor r9, r9, r11
eor r10, r10, r9
and r8, r12, r10
eor r11, r11, r8
mvn r9, r9, ror #24
eor r10, r6, r10, ror #16 //add 1st keyword
eor r11, r7, r11, ror #8 //add 2nd keyword
eor r12, r12, r5 //add rconst
// endianness
rev r9, r9
rev r10, r10
rev r11, r11
rev r12, r12
stm r0, {r9-r12}
pop {r2-r12,r14}
bx lr
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#ifndef COFB_H_
#define COFB_H_
#define DOUBLE_HALF_BLOCK(x) ({ \
tmp0 = (x)[0]; \
(x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); \
(x)[0] |= ((x)[1] & 0x80808080) << 17; \
(x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); \
(x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; \
})
#define TRIPLE_HALF_BLOCK(x) ({ \
tmp0 = (x)[0]; \
tmp1 = (x)[1]; \
(x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); \
(x)[0] |= ((x)[1] & 0x80808080) << 17; \
(x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); \
(x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; \
(x)[0] ^= tmp0; \
(x)[1] ^= tmp1; \
})
#define G(x) ({ \
tmp0 = (x)[0]; \
tmp1 = (x)[1]; \
(x)[0] = (x)[2]; \
(x)[1] = (x)[3]; \
(x)[2] = ((tmp0 & 0x7f7f7f7f) << 1) | ((tmp0 & 0x80808080) >> 15); \
(x)[2] |= ((tmp1 & 0x80808080) << 17); \
(x)[3] = ((tmp1 & 0x7f7f7f7f) << 1) | ((tmp1 & 0x80808080) >> 15); \
(x)[3] |= ((tmp0 & 0x80808080) << 17); \
})
#define XOR_BLOCK(x, y, z) ({ \
(x)[0] = (y)[0] ^ (z)[0]; \
(x)[1] = (y)[1] ^ (z)[1]; \
(x)[2] = (y)[2] ^ (z)[2]; \
(x)[3] = (y)[3] ^ (z)[3]; \
})
#define XOR_TOP_BAR_BLOCK(x, y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
})
#define RHO1(d, y, m, n) ({ \
G(y); \
padding(d,m,n); \
XOR_BLOCK(d, d, y); \
})
#define RHO(y, m, x, c, n) ({ \
XOR_BLOCK(c, y, m); \
RHO1(x, y, m, n); \
})
#define RHO_PRIME(y, c, x, m, n) ({ \
XOR_BLOCK(m, y, c); \
RHO1(x, y, m, n); \
})
#endif // COFB_H_
\ No newline at end of file
/*******************************************************************************
* Constant-time 32-bit implementation of the GIFT-COFB authenticated cipher.
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* @date January 2020
*******************************************************************************/
#include <string.h> //for memcpy
#include "api.h"
#include "cofb.h"
#include "giftb128.h"
#define TAGBYTES CRYPTO_ABYTES
#define BLOCKBYTES CRYPTO_ABYTES
#define COFB_ENCRYPT 1
#define COFB_DECRYPT 0
/****************************************************************************
* 32-bit padding implementation.
****************************************************************************/
static inline void padding(u32* d, const u32* s, const u32 no_of_bytes){
u32 i;
if (no_of_bytes == 0) {
d[0] = 0x00000080; // little-endian
d[1] = 0x00000000;
d[2] = 0x00000000;
d[3] = 0x00000000;
}
else if (no_of_bytes < BLOCKBYTES) {
for (i = 0; i < no_of_bytes/4+1; i++)
d[i] = s[i];
d[i-1] &= ~(0xffffffffL << (no_of_bytes % 4)*8);
d[i-1] |= 0x00000080L << (no_of_bytes % 4)*8;
for (; i < 4; i++)
d[i] = 0x00000000;
}
else {
d[0] = s[0];
d[1] = s[1];
d[2] = s[2];
d[3] = s[3];
}
}
/****************************************************************************
* Constant-time implementation of the GIFT-COFB authenticated cipher based on
* fixsliced GIFTb-128. Encryption/decryption is handled by the same function,
* depending on the 'encrypting' parameter (1/0).
****************************************************************************/
int giftcofb_crypt(u8* out, const u8* key, const u8* nonce, const u8* ad,
u32 ad_len, const u8* in, u32 in_len, const int encrypting) {
u32 tmp0, tmp1, emptyA, emptyM;
u32 offset[2], input[4], rkey[80];
u8 Y[16];
if (!encrypting) {
if (in_len < TAGBYTES)
return -1;
in_len -= TAGBYTES;
}
if (ad_len == 0)
emptyA = 1;
else
emptyA = 0;
if (in_len == 0)
emptyM =1;
else
emptyM = 0;
precompute_rkeys(rkey, key);
giftb128(Y, nonce, rkey);
offset[0] = ((u32*)Y)[0];
offset[1] = ((u32*)Y)[1];
while (ad_len > BLOCKBYTES) {
RHO1(input, (u32*)Y, (u32*)ad, BLOCKBYTES);
DOUBLE_HALF_BLOCK(offset);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128(Y, (u8*)input, rkey);
ad += BLOCKBYTES;
ad_len -= BLOCKBYTES;
}
TRIPLE_HALF_BLOCK(offset);
if ((ad_len % BLOCKBYTES != 0) || (emptyA))
TRIPLE_HALF_BLOCK(offset);
if (emptyM) {
TRIPLE_HALF_BLOCK(offset);
TRIPLE_HALF_BLOCK(offset);
}
RHO1(input, (u32*)Y, (u32*)ad, ad_len);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128(Y, (u8*)input, rkey);
while (in_len > BLOCKBYTES) {
DOUBLE_HALF_BLOCK(offset);
if (encrypting)
RHO((u32*)Y, (u32*)in, input, (u32*)out, BLOCKBYTES);
else
RHO_PRIME((u32*)Y, (u32*)in, input, (u32*)out, BLOCKBYTES);
XOR_TOP_BAR_BLOCK(input, offset);
giftb128(Y, (u8*)input, rkey);
in += BLOCKBYTES;
out += BLOCKBYTES;
in_len -= BLOCKBYTES;
}
if (!emptyM) {
TRIPLE_HALF_BLOCK(offset);
if(in_len % BLOCKBYTES != 0)
TRIPLE_HALF_BLOCK(offset);
if (encrypting) {
RHO((u32*)Y, (u32*)in, input, (u32*)out, in_len);
out += in_len;
}
else {
RHO_PRIME((u32*)Y, (u32*)in, input, (u32*)out, in_len);
in += in_len;
}
XOR_TOP_BAR_BLOCK(input, offset);
giftb128(Y, (u8*)input, rkey);
}
if (encrypting) {
memcpy(out, Y, TAGBYTES);
return 0;
}
// decrypting
tmp0 = 0;
for(tmp1 = 0; tmp1 < TAGBYTES; tmp1++)
tmp0 |= in[tmp1] ^ Y[tmp1];
return tmp0;
}
/****************************************************************************
* API required by the NIST for the LWC competition.
****************************************************************************/
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
(void)nsec;
*clen = mlen + TAGBYTES;
return giftcofb_crypt(c, k, npub, ad, adlen, m, mlen, COFB_ENCRYPT);
}
/****************************************************************************
* API required by the NIST for the LWC competition.
****************************************************************************/
int crypto_aead_decrypt(unsigned char* m, unsigned long long *mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char *k) {
(void)nsec;
*mlen = clen - TAGBYTES;
return giftcofb_crypt(m, k, npub, ad, adlen, c, clen, COFB_DECRYPT);
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#define U32BIG(x) \
((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \
(((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24))
#define U8BIG(x, y) \
(x)[0] = (y) >> 24; \
(x)[1] = ((y) >> 16) & 0xff; \
(x)[2] = ((y) >> 8) & 0xff; \
(x)[3] = (y) & 0xff;
#endif // ENDIAN_H_
\ No newline at end of file
/*******************************************************************************
* Optimized constant-time implementation of the GIFTb-128 block cipher.
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date January 2020
*******************************************************************************/
#include "endian.h"
#include "giftb128.h"
#include "key_schedule.h"
/*****************************************************************************
* The round constants according to the fixsliced representation.
*****************************************************************************/
const u32 rconst[40] = {
0x10000008, 0x80018000, 0x54000002, 0x01010181,
0x8000001f, 0x10888880, 0x6001e000, 0x51500002,
0x03030180, 0x8000002f, 0x10088880, 0x60016000,
0x41500002, 0x03030080, 0x80000027, 0x10008880,
0x4001e000, 0x11500002, 0x03020180, 0x8000002b,
0x10080880, 0x60014000, 0x01400002, 0x02020080,
0x80000021, 0x10000080, 0x0001c000, 0x51000002,
0x03010180, 0x8000002e, 0x10088800, 0x60012000,
0x40500002, 0x01030080, 0x80000006, 0x10008808,
0xc001a000, 0x14500002, 0x01020181, 0x8000001a
};
/*****************************************************************************
* The first 20 rkeys are computed using the classical representation before
* being rearranged into fixsliced representations depending on round numbers.
* The 60 remaining rkeys are directly computed in fixscliced representations.
*****************************************************************************/
void precompute_rkeys(u32* rkey, const u8* key) {
u32 tmp;
//classical initialization
rkey[0] = U32BIG(((u32*)key)[3]);
rkey[1] = U32BIG(((u32*)key)[1]);
rkey[2] = U32BIG(((u32*)key)[2]);
rkey[3] = U32BIG(((u32*)key)[0]);
// classical keyschedule
for(int i = 0; i < 16; i+=2) {
rkey[i+4] = rkey[i+1];
rkey[i+5] = KEY_UPDATE(rkey[i]);
}
// transposition to fixsliced representations
for(int i = 0; i < 20; i+=10) {
rkey[i] = REARRANGE_RKEY_0(rkey[i]);
rkey[i + 1] = REARRANGE_RKEY_0(rkey[i + 1]);
rkey[i + 2] = REARRANGE_RKEY_1(rkey[i + 2]);
rkey[i + 3] = REARRANGE_RKEY_1(rkey[i + 3]);
rkey[i + 4] = REARRANGE_RKEY_2(rkey[i + 4]);
rkey[i + 5] = REARRANGE_RKEY_2(rkey[i + 5]);
rkey[i + 6] = REARRANGE_RKEY_3(rkey[i + 6]);
rkey[i + 7] = REARRANGE_RKEY_3(rkey[i + 7]);
}
// keyschedule according to fixsliced representations
for(int i = 20; i < 80; i+=10) {
rkey[i] = rkey[i-19];
rkey[i+1] = KEY_TRIPLE_UPDATE_0(rkey[i-20]);
rkey[i+2] = KEY_DOUBLE_UPDATE_1(rkey[i-17]);
rkey[i+3] = KEY_TRIPLE_UPDATE_1(rkey[i-18]);
rkey[i+4] = KEY_DOUBLE_UPDATE_2(rkey[i-15]);
rkey[i+5] = KEY_TRIPLE_UPDATE_2(rkey[i-16]);
rkey[i+6] = KEY_DOUBLE_UPDATE_3(rkey[i-13]);
rkey[i+7] = KEY_TRIPLE_UPDATE_3(rkey[i-14]);
rkey[i+8] = KEY_DOUBLE_UPDATE_4(rkey[i-11]);
rkey[i+9] = KEY_TRIPLE_UPDATE_4(rkey[i-12]);
SWAPMOVE(rkey[i], rkey[i], 0x00003333, 16);
SWAPMOVE(rkey[i], rkey[i], 0x55554444, 1);
SWAPMOVE(rkey[i+1], rkey[i+1], 0x55551100, 1);
}
}
/*****************************************************************************
* Encryption of a single 128-bit block with GIFTb-128 (used in GIFT-COFB).
*****************************************************************************/
void giftb128(u8* ctext, const u8* ptext, const u32* rkey) {
u32 tmp, state[4];
state[0] = U32BIG(((u32*)ptext)[0]);
state[1] = U32BIG(((u32*)ptext)[1]);
state[2] = U32BIG(((u32*)ptext)[2]);
state[3] = U32BIG(((u32*)ptext)[3]);
QUINTUPLE_ROUND(state, rkey, rconst);
QUINTUPLE_ROUND(state, rkey + 10, rconst + 5);
QUINTUPLE_ROUND(state, rkey + 20, rconst + 10);
QUINTUPLE_ROUND(state, rkey + 30, rconst + 15);
QUINTUPLE_ROUND(state, rkey + 40, rconst + 20);
QUINTUPLE_ROUND(state, rkey + 50, rconst + 25);
QUINTUPLE_ROUND(state, rkey + 60, rconst + 30);
QUINTUPLE_ROUND(state, rkey + 70, rconst + 35);
U8BIG(ctext, state[0]);
U8BIG(ctext + 4, state[1]);
U8BIG(ctext + 8, state[2]);
U8BIG(ctext + 12, state[3]);
}
#ifndef GIFT128_H_
#define GIFT128_H_
typedef unsigned char u8;
typedef unsigned int u32;
extern void precompute_rkeys(u32* rkeys, const u8* key);
extern void giftb128(u8* out, const u8* in, const u32* rkeys);
#define ROR(x,y) \
(((x) >> (y)) | ((x) << (32 - (y))))
#define BYTE_ROR_2(x) \
((((x) >> 2) & 0x3f3f3f3f) | (((x) & 0x03030303) << 6))
#define BYTE_ROR_4(x) \
((((x) >> 4) & 0x0f0f0f0f) | (((x) & 0x0f0f0f0f) << 4))
#define BYTE_ROR_6(x) \
((((x) >> 6) & 0x03030303) | (((x) & 0x3f3f3f3f) << 2))
#define HALF_ROR_4(x) \
((((x) >> 4) & 0x0fff0fff) | (((x) & 0x000f000f) << 12))
#define HALF_ROR_8(x) \
((((x) >> 8) & 0x00ff00ff) | (((x) & 0x00ff00ff) << 8))
#define HALF_ROR_12(x) \
((((x) >> 12)& 0x000f000f) | (((x) & 0x0fff0fff) << 4))
#define NIBBLE_ROR_1(x) \
((((x) >> 1) & 0x77777777) | (((x) & 0x11111111) << 3))
#define NIBBLE_ROR_2(x) \
((((x) >> 2) & 0x33333333) | (((x) & 0x33333333) << 2))
#define NIBBLE_ROR_3(x) \
((((x) >> 3) & 0x11111111) | (((x) & 0x77777777) << 1))
#define SWAPMOVE(a, b, mask, n) \
tmp = (b ^ (a >> n)) & mask; \
b ^= tmp; \
a ^= (tmp << n);
#define SBOX(s0, s1, s2, s3) \
s1 ^= s0 & s2; \
s0 ^= s1 & s3; \
s2 ^= s0 | s1; \
s3 ^= s2; \
s1 ^= s3; \
s3 ^= 0xffffffff; \
s2 ^= s0 & s1;
#define QUINTUPLE_ROUND(state, rkey, rconst) ({ \
SBOX(state[0], state[1], state[2], state[3]); \
state[3] = NIBBLE_ROR_1(state[3]); \
state[1] = NIBBLE_ROR_2(state[1]); \
state[2] = NIBBLE_ROR_3(state[2]); \
state[1] ^= (rkey)[0]; \
state[2] ^= (rkey)[1]; \
state[0] ^= (rconst)[0]; \
SBOX(state[3], state[1], state[2], state[0]); \
state[0] = HALF_ROR_4(state[0]); \
state[1] = HALF_ROR_8(state[1]); \
state[2] = HALF_ROR_12(state[2]); \
state[1] ^= (rkey)[2]; \
state[2] ^= (rkey)[3]; \
state[3] ^= (rconst)[1]; \
SBOX(state[0], state[1], state[2], state[3]); \
state[3] = ROR(state[3], 16); \
state[2] = ROR(state[2], 16); \
SWAPMOVE(state[1], state[1], 0x55555555, 1); \
SWAPMOVE(state[2], state[2], 0x00005555, 1); \
SWAPMOVE(state[3], state[3], 0x55550000, 1); \
state[1] ^= (rkey)[4]; \
state[2] ^= (rkey)[5]; \
state[0] ^= (rconst)[2]; \
SBOX(state[3], state[1], state[2], state[0]); \
state[0] = BYTE_ROR_6(state[0]); \
state[1] = BYTE_ROR_4(state[1]); \
state[2] = BYTE_ROR_2(state[2]); \
state[1] ^= (rkey)[6]; \
state[2] ^= (rkey)[7]; \
state[3] ^= (rconst)[3]; \
SBOX(state[0], state[1], state[2], state[3]); \
state[3] = ROR(state[3], 24); \
state[1] = ROR(state[1], 16); \
state[2] = ROR(state[2], 8); \
state[1] ^= (rkey)[8]; \
state[2] ^= (rkey)[9]; \
state[0] ^= (rconst)[4]; \
state[0] ^= state[3]; \
state[3] ^= state[0]; \
state[0] ^= state[3]; \
})
#endif // GIFT128_H_
\ No newline at end of file
#ifndef KEYSCHEDULE_H_
#define KEYSCHEDULE_H_
#define REARRANGE_RKEY_0(x) ({ \
SWAPMOVE(x, x, 0x00550055, 9); \
SWAPMOVE(x, x, 0x000f000f, 12); \
SWAPMOVE(x, x, 0x00003333, 18); \
SWAPMOVE(x, x, 0x000000ff, 24); \
})
#define REARRANGE_RKEY_1(x) ({ \
SWAPMOVE(x, x, 0x11111111, 3); \
SWAPMOVE(x, x, 0x03030303, 6); \
SWAPMOVE(x, x, 0x000f000f, 12); \
SWAPMOVE(x, x, 0x000000ff, 24); \
})
#define REARRANGE_RKEY_2(x) ({ \
SWAPMOVE(x, x, 0x0000aaaa, 15); \
SWAPMOVE(x, x, 0x00003333, 18); \
SWAPMOVE(x, x, 0x0000f0f0, 12); \
SWAPMOVE(x, x, 0x000000ff, 24); \
})
#define REARRANGE_RKEY_3(x) ({ \
SWAPMOVE(x, x, 0x0a0a0a0a, 3); \
SWAPMOVE(x, x, 0x00cc00cc, 6); \
SWAPMOVE(x, x, 0x0000f0f0, 12); \
SWAPMOVE(x, x, 0x000000ff, 24); \
})
#define KEY_UPDATE(x) \
(((x) >> 12) & 0x0000000f) | (((x) & 0x00000fff) << 4) | \
(((x) >> 2) & 0x3fff0000) | (((x) & 0x00030000) << 14)
#define KEY_TRIPLE_UPDATE_0(x) \
(ROR((x) & 0x33333333, 24) | ROR((x) & 0xcccccccc, 16))
#define KEY_DOUBLE_UPDATE_1(x) \
((((x) >> 4) & 0x0f000f00) | (((x) & 0x0f000f00) << 4) | \
(((x) >> 6) & 0x00030003) | (((x) & 0x003f003f) << 2))
#define KEY_TRIPLE_UPDATE_1(x) \
((((x) >> 6) & 0x03000300) | (((x) & 0x3f003f00) << 2) | \
(((x) >> 5) & 0x00070007) | (((x) & 0x001f001f) << 3))
#define KEY_DOUBLE_UPDATE_2(x) \
(ROR((x) & 0xaaaaaaaa, 24) | ROR((x) & 0x55555555, 16))
#define KEY_TRIPLE_UPDATE_2(x) \
(ROR((x) & 0x55555555, 24) | ROR((x) & 0xaaaaaaaa, 20))
#define KEY_DOUBLE_UPDATE_3(x) \
((((x) >> 2) & 0x03030303) | (((x) & 0x03030303) << 2) | \
(((x) >> 1) & 0x70707070) | (((x) & 0x10101010) << 3))
#define KEY_TRIPLE_UPDATE_3(x) \
((((x) >> 18) & 0x00003030) | (((x) & 0x01010101) << 3) | \
(((x) >> 14) & 0x0000c0c0) | (((x) & 0x0000e0e0) << 15)| \
(((x) >> 1) & 0x07070707) | (((x) & 0x00001010) << 19))
#define KEY_DOUBLE_UPDATE_4(x) \
((((x) >> 4) & 0x0fff0000) | (((x) & 0x000f0000) << 12) | \
(((x) >> 8) & 0x000000ff) | (((x) & 0x000000ff) << 8))
#define KEY_TRIPLE_UPDATE_4(x) \
((((x) >> 6) & 0x03ff0000) | (((x) & 0x003f0000) << 10) | \
(((x) >> 4) & 0x00000fff) | (((x) & 0x0000000f) << 12))
#endif // KEYSCHEDULE_H_
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k);
int crypto_aead_decrypt(unsigned char *m, unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k);
/*
* Date: 29 November 2018
* Contact: Thomas Peyrin - thomas.peyrin@gmail.com
* Mustafa Khairallah - mustafam001@e.ntu.edu.sg
*/
#include "crypto_aead.h"
#include "api.h"
#include "skinny.h"
#include <stdio.h>
#include <stdlib.h>
void pad (const unsigned char* m, unsigned char* mp, int l, int len8) {
int i;
for (i = 0; i < l; i++) {
if (i < len8) {
mp[i] = m[i];
}
else if (i == l - 1) {
mp[i] = (len8 & 0x0f);
}
else {
mp[i] = 0x00;
}
}
}
void g8A (unsigned char* s, unsigned char* c) {
unsigned int tmps[4];
unsigned int tmpc[4];
tmps[0] = *((unsigned int *)&s[0]);
tmps[1] = *((unsigned int *)&s[4]);
tmps[2] = *((unsigned int *)&s[8]);
tmps[3] = *((unsigned int *)&s[12]);
// c[i] = (s[i] >> 1) ^ (s[i] & 0x80) ^ ((s[i] & 0x01) << 7);
//
// (s[i] >> 1) -> ((s[i]>>1)&0x7f)
// (s[i] & 0x80) -> (s[i])&0x80) not changed
// ((s[i] & 0x01) << 7) -> ((s[i]<<7)&0x80)
// use word access because of speeding up
tmpc[0] = ((tmps[0]>>1) & 0x7f7f7f7f) ^ (tmps[0] & 0x80808080) ^ ((tmps[0]<<7) & 0x80808080);
tmpc[1] = ((tmps[1]>>1) & 0x7f7f7f7f) ^ (tmps[1] & 0x80808080) ^ ((tmps[1]<<7) & 0x80808080);
tmpc[2] = ((tmps[2]>>1) & 0x7f7f7f7f) ^ (tmps[2] & 0x80808080) ^ ((tmps[2]<<7) & 0x80808080);
tmpc[3] = ((tmps[3]>>1) & 0x7f7f7f7f) ^ (tmps[3] & 0x80808080) ^ ((tmps[3]<<7) & 0x80808080);
*((unsigned int *)&c[0]) = tmpc[0];
*((unsigned int *)&c[4]) = tmpc[1];
*((unsigned int *)&c[8]) = tmpc[2];
*((unsigned int *)&c[12]) = tmpc[3];
}
void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) {
unsigned int tmps[4];
unsigned int tmpc[4];
tmps[0] = *((unsigned int *)&s[0]);
tmps[1] = *((unsigned int *)&s[4]);
tmps[2] = *((unsigned int *)&s[8]);
tmps[3] = *((unsigned int *)&s[12]);
// c[i] = (s[i] >> 1) ^ (s[i] & 0x80) ^ ((s[i] & 0x01) << 7);
//
// (s[i] >> 1) -> ((s[i]>>1)&0x7f)
// (s[i] & 0x80) -> (s[i])&0x80) not changed
// ((s[i] & 0x01) << 7) -> ((s[i]<<7)&0x80)
// use word access because of speeding up
tmpc[0] = ((tmps[0]>>1) & 0x7f7f7f7f) ^ (tmps[0] & 0x80808080) ^ ((tmps[0]<<7) & 0x80808080);
tmpc[1] = ((tmps[1]>>1) & 0x7f7f7f7f) ^ (tmps[1] & 0x80808080) ^ ((tmps[1]<<7) & 0x80808080);
tmpc[2] = ((tmps[2]>>1) & 0x7f7f7f7f) ^ (tmps[2] & 0x80808080) ^ ((tmps[2]<<7) & 0x80808080);
tmpc[3] = ((tmps[3]>>1) & 0x7f7f7f7f) ^ (tmps[3] & 0x80808080) ^ ((tmps[3]<<7) & 0x80808080);
// use byte access because of memory alignment.
// c is not always in word(4 byte) alignment.
c[0] = tmpc[0] &0xFF;
c[1] = (tmpc[0]>>8) &0xFF;
c[2] = (tmpc[0]>>16)&0xFF;
c[3] = (tmpc[0]>>24)&0xFF;
c[4] = tmpc[1] &0xFF;
c[5] = (tmpc[1]>>8) &0xFF;
c[6] = (tmpc[1]>>16)&0xFF;
c[7] = (tmpc[1]>>24)&0xFF;
c[8] = tmpc[2] &0xFF;
c[9] = (tmpc[2]>>8) &0xFF;
c[10] = (tmpc[2]>>16)&0xFF;
c[11] = (tmpc[2]>>24)&0xFF;
c[12] = tmpc[3] &0xFF;
c[13] = (tmpc[3]>>8) &0xFF;
c[14] = (tmpc[3]>>16)&0xFF;
c[15] = (tmpc[3]>>24)&0xFF;
}
void rho_ad_eqov16 (const unsigned char* m,
unsigned char* s) {
*((unsigned int *)&s[0]) ^= *((unsigned int *)&m[0]);
*((unsigned int *)&s[4]) ^= *((unsigned int *)&m[4]);
*((unsigned int *)&s[8]) ^= *((unsigned int *)&m[8]);
*((unsigned int *)&s[12]) ^= *((unsigned int *)&m[12]);
}
void rho_ad_ud16 (const unsigned char* m,
unsigned char* s,
int len8) {
unsigned char mp [16];
pad(m,mp,16,len8);
*((unsigned int *)&s[0]) ^= *((unsigned int *)&mp[0]);
*((unsigned int *)&s[4]) ^= *((unsigned int *)&mp[4]);
*((unsigned int *)&s[8]) ^= *((unsigned int *)&mp[8]);
*((unsigned int *)&s[12]) ^= *((unsigned int *)&mp[12]);
}
void rho_eqov16 (const unsigned char* m,
unsigned char* c,
unsigned char* s) {
g8A(s,c);
*((unsigned int *)&s[0]) ^= *((unsigned int *)&m[0]);
*((unsigned int *)&s[4]) ^= *((unsigned int *)&m[4]);
*((unsigned int *)&s[8]) ^= *((unsigned int *)&m[8]);
*((unsigned int *)&s[12]) ^= *((unsigned int *)&m[12]);
*((unsigned int *)&c[0]) ^= *((unsigned int *)&m[0]);
*((unsigned int *)&c[4]) ^= *((unsigned int *)&m[4]);
*((unsigned int *)&c[8]) ^= *((unsigned int *)&m[8]);
*((unsigned int *)&c[12]) ^= *((unsigned int *)&m[12]);
}
void rho_ud16 (const unsigned char* m,
unsigned char* c,
unsigned char* s,
int len8,
int ver) {
int i;
unsigned char mp [16];
pad(m,mp,ver,len8);
g8A(s,c);
*((unsigned int *)&s[0]) ^= *((unsigned int *)&mp[0]);
*((unsigned int *)&s[4]) ^= *((unsigned int *)&mp[4]);
*((unsigned int *)&s[8]) ^= *((unsigned int *)&mp[8]);
*((unsigned int *)&s[12]) ^= *((unsigned int *)&mp[12]);
for (i = 0; i < ver; i++) {
if (i < len8) {
c[i] = c[i] ^ mp[i];
}
else {
c[i] = 0;
}
}
}
void irho (unsigned char* m,
const unsigned char* c,
unsigned char* s,
int len8,
int ver) {
int i;
unsigned char cp [16];
pad(c,cp,ver,len8);
g8A(s,m);
for (i = 0; i < ver; i++) {
if (i < len8) {
s[i] = s[i] ^ cp[i] ^ m[i];
}
else {
s[i] = s[i] ^ cp[i];
}
if (i < len8) {
m[i] = m[i] ^ cp[i];
}
else {
m[i] = 0;
}
}
}
void reset_lfsr_gf56 (unsigned char* CNT) {
*((unsigned int *)&CNT[0]) = 0x00000001;
*((unsigned int *)&CNT[4]) = 0x00000000;
}
void lfsr_gf56 (unsigned char* CNT) {
unsigned int tmpCNT[2];
unsigned int fb0;
tmpCNT[0] = *((unsigned int *)&CNT[0]); // CNT3 CNT2 CNT1 CNT0
tmpCNT[1] = *((unsigned int *)&CNT[4]); // CNT7 CNT6 CNT5 CNT4
fb0 = 0;
if ((tmpCNT[1] >> 23)&0x01) {
fb0 = 0x95;
}
tmpCNT[1] = tmpCNT[1] << 1 | tmpCNT[0] >> 31;
tmpCNT[0] = tmpCNT[0] << 1 ^ fb0;
*((unsigned int *)&CNT[0]) = tmpCNT[0];
*((unsigned int *)&CNT[4]) = tmpCNT[1];
}
void block_cipher(unsigned char* s,
const unsigned char* k, unsigned char* T,
unsigned char* CNT,
skinny_ctrl* p_skinny_ctrl) {
p_skinny_ctrl->func_skinny_128_384_enc (s,p_skinny_ctrl,CNT,T,k);
}
void nonce_encryption (const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
unsigned char T [16];
*((unsigned int *)&T[0]) = *((unsigned int *)&N[0]);
*((unsigned int *)&T[4]) = *((unsigned int *)&N[4]);
*((unsigned int *)&T[8]) = *((unsigned int *)&N[8]);
*((unsigned int *)&T[12]) = *((unsigned int *)&N[12]);
CNT[7] = D;
block_cipher(s,k,T,CNT,p_skinny_ctrl);
}
void generate_tag (unsigned char** c, unsigned char* s,
int n, unsigned long long* clen) {
g8A_for_Tag_Generation(s, *c);
*c = *c + n;
*c = *c - *clen;
}
unsigned long long msg_encryption (const unsigned char** M, unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* p_skinny_ctrl) {
int len8;
if (mlen >= 16) {
len8 = 16;
mlen = mlen - 16;
rho_eqov16(*M, *c, s);
}
else {
len8 = mlen;
mlen = 0;
rho_ud16(*M, *c, s, len8, 16);
}
*c = *c + len8;
*M = *M + len8;
lfsr_gf56(CNT);
if (mlen != 0) {
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
}
return mlen;
}
unsigned long long msg_decryption (unsigned char** M, const unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long clen,
skinny_ctrl* p_skinny_ctrl) {
int len8;
if (clen >= 16) {
len8 = 16;
clen = clen - 16;
}
else {
len8 = clen;
clen = 0;
}
irho(*M, *c, s, len8, 16);
*c = *c + len8;
*M = *M + len8;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return clen;
}
unsigned long long ad2msg_encryption (const unsigned char** M,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* p_skinny_ctrl) {
unsigned char T [16];
int len8;
if (mlen <= 16) {
len8 = mlen;
mlen = 0;
pad (*M,T,16,len8);
}
else {
len8 = 16;
mlen = mlen - 16;
unsigned char *pM = (unsigned char *)(*M);
*((unsigned int *)&T[0]) = *((unsigned int *)&pM[0]);
*((unsigned int *)&T[4]) = *((unsigned int *)&pM[4]);
*((unsigned int *)&T[8]) = *((unsigned int *)&pM[8]);
*((unsigned int *)&T[12]) = *((unsigned int *)&pM[12]);
}
CNT[7] = D;
block_cipher(s,k,T,CNT,p_skinny_ctrl);
lfsr_gf56(CNT);
*M = *M + len8;
return mlen;
}
unsigned long long ad_encryption (const unsigned char** A, unsigned char* s,
const unsigned char* k, unsigned long long adlen,
unsigned char* CNT,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
unsigned char T [16];
int len8;
if (adlen >= 16) {
len8 = 16;
adlen = adlen - 16;
rho_ad_eqov16(*A, s);
}
else {
len8 = adlen;
adlen = 0;
rho_ad_ud16(*A, s, len8);
}
*A = *A + len8;
lfsr_gf56(CNT);
if (adlen != 0) {
if (adlen >= 16) {
len8 = 16;
adlen = adlen - 16;
unsigned char *pA = (unsigned char *)(*A);
*((unsigned int *)&T[0]) = *((unsigned int *)&pA[0]);
*((unsigned int *)&T[4]) = *((unsigned int *)&pA[4]);
*((unsigned int *)&T[8]) = *((unsigned int *)&pA[8]);
*((unsigned int *)&T[12]) = *((unsigned int *)&pA[12]);
}
else {
len8 = adlen;
adlen = 0;
pad(*A, T, 16, len8);
}
*A = *A + len8;
CNT[7] = D;
block_cipher(s,k,T,CNT,p_skinny_ctrl);
lfsr_gf56(CNT);
}
return adlen;
}
int crypto_aead_encrypt (
unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec,
const unsigned char* npub,
const unsigned char* k
)
{
unsigned char s[16];
unsigned char CNT[8]; // size 7 -> 8 for word access
unsigned char T[16];
const unsigned char* N;
unsigned char w;
unsigned long long xlen;
skinny_ctrl l_skinny_ctrl;
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
(void)nsec;
N = npub;
xlen = mlen;
*((unsigned int *)&s[0]) = 0x00000000;
*((unsigned int *)&s[4]) = 0x00000000;
*((unsigned int *)&s[8]) = 0x00000000;
*((unsigned int *)&s[12]) = 0x00000000;
reset_lfsr_gf56(CNT);
w = 48;
if (adlen == 0) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 0) {
w = w ^ 8;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) < 16) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 16) {
w = w ^ 0;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else {
w = w ^ 10;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
}
else while (adlen > 0) {
adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl);
}
if ((w & 8) == 0) {
xlen = ad2msg_encryption (&m,CNT,s,k,44,xlen,&l_skinny_ctrl);
}
else if (mlen == 0) {
lfsr_gf56(CNT);
}
while (xlen > 0) {
xlen = ad_encryption(&m,s,k,xlen,CNT,44,&l_skinny_ctrl);
}
nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl);
// because, nonce_encryption is called at the last block of AD encryption
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
// Tag generation
g8A(s, T);
m = m - mlen;
reset_lfsr_gf56(CNT);
*((unsigned int *)&s[0]) = *((unsigned int *)&T[0]);
*((unsigned int *)&s[4]) = *((unsigned int *)&T[4]);
*((unsigned int *)&s[8]) = *((unsigned int *)&T[8]);
*((unsigned int *)&s[12]) = *((unsigned int *)&T[12]);
*clen = mlen + 16;
if (mlen > 0) {
nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl);
while (mlen > 16) {
mlen = msg_encryption(&m,&c,N,CNT,s,k,36,mlen,&l_skinny_ctrl);
}
rho_ud16(m, c, s, mlen, 16);
c = c + mlen;
m = m + mlen;
}
// Tag Concatenation
// use byte access because of memory alignment.
// c is not always in word(4 byte) alignment.
for (int i = 0; i < 16; i = i + 1) {
*(c + i) = T[i];
}
c = c - *clen;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
unsigned char s[16];
unsigned char CNT[8]; // size 7 -> 8 for word access
unsigned char T[16];
const unsigned char* N;
unsigned char w;
unsigned long long xlen;
const unsigned char* mauth;
skinny_ctrl l_skinny_ctrl;
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
(void)nsec;
mauth = m;
N = npub;
xlen = clen-16;
reset_lfsr_gf56(CNT);
for (int i = 0; i < 16; i++) {
T[i] = *(c + clen - 16 + i);
}
*((unsigned int *)&s[0]) = *((unsigned int *)&T[0]);
*((unsigned int *)&s[4]) = *((unsigned int *)&T[4]);
*((unsigned int *)&s[8]) = *((unsigned int *)&T[8]);
*((unsigned int *)&s[12]) = *((unsigned int *)&T[12]);
clen = clen - 16;
*mlen = clen;
if (clen > 0) {
nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl);
while (clen > 16) {
clen = msg_decryption(&m,&c,N,CNT,s,k,36,clen,&l_skinny_ctrl);
}
irho(m, c, s, clen, 16);
c = c + clen;
m = m + clen;
}
*((unsigned int *)&s[0]) = 0x00000000;
*((unsigned int *)&s[4]) = 0x00000000;
*((unsigned int *)&s[8]) = 0x00000000;
*((unsigned int *)&s[12]) = 0x00000000;
reset_lfsr_gf56(CNT);
w = 48;
if (adlen == 0) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 0) {
w = w ^ 8;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) < 16) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 16) {
w = w ^ 0;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else {
w = w ^ 10;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
}
else while (adlen > 0) {
adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl);
}
if ((w & 8) == 0) {
xlen = ad2msg_encryption (&mauth,CNT,s,k,44,xlen,&l_skinny_ctrl);
}
else if (clen == 0) {
lfsr_gf56(CNT);
}
while (xlen > 0) {
xlen = ad_encryption(&mauth,s,k,xlen,CNT,44,&l_skinny_ctrl);
}
nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl);
// Tag generation
g8A_for_Tag_Generation(s, T);
for (int i = 0; i < 16; i++) {
if (T[i] != (*(c+i))) {
return -1;
}
}
return 0;
}
//
// NIST-developed software is provided by NIST as a public service.
// You may use, copy and distribute copies of the software in any medium,
// provided that you keep intact this entire notice. You may improve,
// modify and create derivative works of the software or any portion of
// the software, and you may copy and distribute such modifications or
// works. Modified works should carry a notice stating that you changed
// the software and should note the date and nature of any such change.
// Please explicitly acknowledge the National Institute of Standards and
// Technology as the source of the software.
//
// NIST-developed software is expressly provided "AS IS." NIST MAKES NO
// WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION
// OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST
// NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE
// UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST
// DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE
// OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY,
// RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
//
// You are solely responsible for determining the appropriateness of using and
// distributing the software and you assume all risks associated with its use,
// including but not limited to the risks and costs of program errors, compliance
// with applicable laws, damage to or loss of data, programs or equipment, and
// the unavailability or interruption of operation. This software is not intended
// to be used in any situation where a failure could cause risk of injury or
// damage to property. The software developed by NIST employees is not subject to
// copyright protection within the United States.
//
// disable deprecation for sprintf and fopen
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdio.h>
#include <string.h>
#include "crypto_aead.h"
#include "api.h"
#define KAT_SUCCESS 0
#define KAT_FILE_OPEN_ERROR -1
#define KAT_DATA_ERROR -3
#define KAT_CRYPTO_FAILURE -4
#define MAX_FILE_NAME 256
#define MAX_MESSAGE_LENGTH 32
#define MAX_ASSOCIATED_DATA_LENGTH 32
void init_buffer(unsigned char *buffer, unsigned long long numbytes);
void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, unsigned long long length);
int generate_test_vectors();
int main()
{
int ret = generate_test_vectors();
if (ret != KAT_SUCCESS) {
fprintf(stderr, "test vector generation failed with code %d\n", ret);
}
return ret;
}
int generate_test_vectors()
{
FILE *fp;
char fileName[MAX_FILE_NAME];
unsigned char key[CRYPTO_KEYBYTES];
unsigned char nonce[CRYPTO_NPUBBYTES];
unsigned char msg[MAX_MESSAGE_LENGTH];
unsigned char msg2[MAX_MESSAGE_LENGTH];
unsigned char ad[MAX_ASSOCIATED_DATA_LENGTH];
unsigned char ct[MAX_MESSAGE_LENGTH + CRYPTO_ABYTES];
unsigned long long clen, mlen2;
int count = 1;
int func_ret, ret_val = KAT_SUCCESS;
init_buffer(key, sizeof(key));
init_buffer(nonce, sizeof(nonce));
init_buffer(msg, sizeof(msg));
init_buffer(ad, sizeof(ad));
sprintf(fileName, "LWC_AEAD_KAT_%d_%d.txt", (CRYPTO_KEYBYTES * 8), (CRYPTO_NPUBBYTES * 8));
if ((fp = fopen(fileName, "w")) == NULL) {
fprintf(stderr, "Couldn't open <%s> for write\n", fileName);
return KAT_FILE_OPEN_ERROR;
}
for (unsigned long long mlen = 0; (mlen <= MAX_MESSAGE_LENGTH) && (ret_val == KAT_SUCCESS); mlen++) {
for (unsigned long long adlen = 0; adlen <= MAX_ASSOCIATED_DATA_LENGTH; adlen++) {
printf("%0d\n", (int)clen);
fprintf(fp, "Count = %d\n", count++);
printf("Count = %d\n", count - 1);
fprint_bstr(fp, "Key = ", key, CRYPTO_KEYBYTES);
fprint_bstr(fp, "Nonce = ", nonce, CRYPTO_NPUBBYTES);
fprint_bstr(fp, "PT = ", msg, mlen);
fprint_bstr(fp, "AD = ", ad, adlen);
if ((func_ret = crypto_aead_encrypt(ct, &clen, msg, mlen, ad, adlen, NULL, nonce, key)) != 0) {
fprintf(fp, "crypto_aead_encrypt returned <%d>\n", func_ret);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
fprint_bstr(fp, "CT = ", ct, clen);
fprintf(fp, "\n");
if ((func_ret = crypto_aead_decrypt(msg2, &mlen2, NULL, ct, clen, ad, adlen, nonce, key)) != 0) {
fprintf(fp, "crypto_aead_decrypt returned <%d>\n", func_ret);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
if (mlen != mlen2) {
fprintf(fp, "crypto_aead_decrypt returned bad 'mlen': Got <%llu>, expected <%llu>\n", mlen2, mlen);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
if (memcmp(msg, msg2, mlen)) {
fprintf(fp, "crypto_aead_decrypt did not recover the plaintext\n");
ret_val = KAT_CRYPTO_FAILURE;
break;
}
}
}
fclose(fp);
return ret_val;
}
void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, unsigned long long length)
{
fprintf(fp, "%s", label);
for (unsigned long long i = 0; i < length; i++)
fprintf(fp, "%02X", data[i]);
fprintf(fp, "\n");
}
void init_buffer(unsigned char *buffer, unsigned long long numbytes)
{
for (unsigned long long i = 0; i < numbytes; i++)
buffer[i] = (unsigned char)i;
}
typedef struct ___skinny_ctrl {
unsigned char roundKeys[960]; // number of round : 56
void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K);
} skinny_ctrl;
extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k);
int crypto_aead_decrypt(unsigned char *m, unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k);
/*
* Date: 29 November 2018
* Contact: Thomas Peyrin - thomas.peyrin@gmail.com
* Mustafa Khairallah - mustafam001@e.ntu.edu.sg
*/
#include "crypto_aead.h"
#include "api.h"
#include "skinny.h"
#include <stdio.h>
#include <stdlib.h>
void pad (const unsigned char* m, unsigned char* mp, int l, int len8) {
int i;
for (i = 0; i < l; i++) {
if (i < len8) {
mp[i] = m[i];
}
else if (i == l - 1) {
mp[i] = (len8 & 0x0f);
}
else {
mp[i] = 0x00;
}
}
}
void g8A (unsigned char* s, unsigned char* c) {
unsigned int tmps[4];
unsigned int tmpc[4];
tmps[0] = *((unsigned int *)&s[0]);
tmps[1] = *((unsigned int *)&s[4]);
tmps[2] = *((unsigned int *)&s[8]);
tmps[3] = *((unsigned int *)&s[12]);
// c[i] = (s[i] >> 1) ^ (s[i] & 0x80) ^ ((s[i] & 0x01) << 7);
//
// (s[i] >> 1) -> ((s[i]>>1)&0x7f)
// (s[i] & 0x80) -> (s[i])&0x80) not changed
// ((s[i] & 0x01) << 7) -> ((s[i]<<7)&0x80)
// use word access because of speeding up
tmpc[0] = ((tmps[0]>>1) & 0x7f7f7f7f) ^ (tmps[0] & 0x80808080) ^ ((tmps[0]<<7) & 0x80808080);
tmpc[1] = ((tmps[1]>>1) & 0x7f7f7f7f) ^ (tmps[1] & 0x80808080) ^ ((tmps[1]<<7) & 0x80808080);
tmpc[2] = ((tmps[2]>>1) & 0x7f7f7f7f) ^ (tmps[2] & 0x80808080) ^ ((tmps[2]<<7) & 0x80808080);
tmpc[3] = ((tmps[3]>>1) & 0x7f7f7f7f) ^ (tmps[3] & 0x80808080) ^ ((tmps[3]<<7) & 0x80808080);
*((unsigned int *)&c[0]) = tmpc[0];
*((unsigned int *)&c[4]) = tmpc[1];
*((unsigned int *)&c[8]) = tmpc[2];
*((unsigned int *)&c[12]) = tmpc[3];
}
void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) {
unsigned int tmps[4];
unsigned int tmpc[4];
tmps[0] = *((unsigned int *)&s[0]);
tmps[1] = *((unsigned int *)&s[4]);
tmps[2] = *((unsigned int *)&s[8]);
tmps[3] = *((unsigned int *)&s[12]);
// c[i] = (s[i] >> 1) ^ (s[i] & 0x80) ^ ((s[i] & 0x01) << 7);
//
// (s[i] >> 1) -> ((s[i]>>1)&0x7f)
// (s[i] & 0x80) -> (s[i])&0x80) not changed
// ((s[i] & 0x01) << 7) -> ((s[i]<<7)&0x80)
// use word access because of speeding up
tmpc[0] = ((tmps[0]>>1) & 0x7f7f7f7f) ^ (tmps[0] & 0x80808080) ^ ((tmps[0]<<7) & 0x80808080);
tmpc[1] = ((tmps[1]>>1) & 0x7f7f7f7f) ^ (tmps[1] & 0x80808080) ^ ((tmps[1]<<7) & 0x80808080);
tmpc[2] = ((tmps[2]>>1) & 0x7f7f7f7f) ^ (tmps[2] & 0x80808080) ^ ((tmps[2]<<7) & 0x80808080);
tmpc[3] = ((tmps[3]>>1) & 0x7f7f7f7f) ^ (tmps[3] & 0x80808080) ^ ((tmps[3]<<7) & 0x80808080);
// use byte access because of memory alignment.
// c is not always in word(4 byte) alignment.
c[0] = tmpc[0] &0xFF;
c[1] = (tmpc[0]>>8) &0xFF;
c[2] = (tmpc[0]>>16)&0xFF;
c[3] = (tmpc[0]>>24)&0xFF;
c[4] = tmpc[1] &0xFF;
c[5] = (tmpc[1]>>8) &0xFF;
c[6] = (tmpc[1]>>16)&0xFF;
c[7] = (tmpc[1]>>24)&0xFF;
c[8] = tmpc[2] &0xFF;
c[9] = (tmpc[2]>>8) &0xFF;
c[10] = (tmpc[2]>>16)&0xFF;
c[11] = (tmpc[2]>>24)&0xFF;
c[12] = tmpc[3] &0xFF;
c[13] = (tmpc[3]>>8) &0xFF;
c[14] = (tmpc[3]>>16)&0xFF;
c[15] = (tmpc[3]>>24)&0xFF;
}
void rho_ad_eqov16 (const unsigned char* m,
unsigned char* s) {
*((unsigned int *)&s[0]) ^= *((unsigned int *)&m[0]);
*((unsigned int *)&s[4]) ^= *((unsigned int *)&m[4]);
*((unsigned int *)&s[8]) ^= *((unsigned int *)&m[8]);
*((unsigned int *)&s[12]) ^= *((unsigned int *)&m[12]);
}
void rho_ad_ud16 (const unsigned char* m,
unsigned char* s,
int len8) {
unsigned char mp [16];
pad(m,mp,16,len8);
*((unsigned int *)&s[0]) ^= *((unsigned int *)&mp[0]);
*((unsigned int *)&s[4]) ^= *((unsigned int *)&mp[4]);
*((unsigned int *)&s[8]) ^= *((unsigned int *)&mp[8]);
*((unsigned int *)&s[12]) ^= *((unsigned int *)&mp[12]);
}
void rho_eqov16 (const unsigned char* m,
unsigned char* c,
unsigned char* s) {
g8A(s,c);
*((unsigned int *)&s[0]) ^= *((unsigned int *)&m[0]);
*((unsigned int *)&s[4]) ^= *((unsigned int *)&m[4]);
*((unsigned int *)&s[8]) ^= *((unsigned int *)&m[8]);
*((unsigned int *)&s[12]) ^= *((unsigned int *)&m[12]);
*((unsigned int *)&c[0]) ^= *((unsigned int *)&m[0]);
*((unsigned int *)&c[4]) ^= *((unsigned int *)&m[4]);
*((unsigned int *)&c[8]) ^= *((unsigned int *)&m[8]);
*((unsigned int *)&c[12]) ^= *((unsigned int *)&m[12]);
}
void rho_ud16 (const unsigned char* m,
unsigned char* c,
unsigned char* s,
int len8,
int ver) {
int i;
unsigned char mp [16];
pad(m,mp,ver,len8);
g8A(s,c);
*((unsigned int *)&s[0]) ^= *((unsigned int *)&mp[0]);
*((unsigned int *)&s[4]) ^= *((unsigned int *)&mp[4]);
*((unsigned int *)&s[8]) ^= *((unsigned int *)&mp[8]);
*((unsigned int *)&s[12]) ^= *((unsigned int *)&mp[12]);
for (i = 0; i < ver; i++) {
if (i < len8) {
c[i] = c[i] ^ mp[i];
}
else {
c[i] = 0;
}
}
}
void irho (unsigned char* m,
const unsigned char* c,
unsigned char* s,
int len8,
int ver) {
int i;
unsigned char cp [16];
pad(c,cp,ver,len8);
g8A(s,m);
for (i = 0; i < ver; i++) {
if (i < len8) {
s[i] = s[i] ^ cp[i] ^ m[i];
}
else {
s[i] = s[i] ^ cp[i];
}
if (i < len8) {
m[i] = m[i] ^ cp[i];
}
else {
m[i] = 0;
}
}
}
void reset_lfsr_gf56 (unsigned char* CNT) {
*((unsigned int *)&CNT[0]) = 0x00000001;
*((unsigned int *)&CNT[4]) = 0x00000000;
}
void lfsr_gf56 (unsigned char* CNT) {
unsigned int tmpCNT[2];
unsigned int fb0;
tmpCNT[0] = *((unsigned int *)&CNT[0]); // CNT3 CNT2 CNT1 CNT0
tmpCNT[1] = *((unsigned int *)&CNT[4]); // CNT7 CNT6 CNT5 CNT4
fb0 = 0;
if ((tmpCNT[1] >> 23)&0x01) {
fb0 = 0x95;
}
tmpCNT[1] = tmpCNT[1] << 1 | tmpCNT[0] >> 31;
tmpCNT[0] = tmpCNT[0] << 1 ^ fb0;
*((unsigned int *)&CNT[0]) = tmpCNT[0];
*((unsigned int *)&CNT[4]) = tmpCNT[1];
}
void block_cipher(unsigned char* s,
const unsigned char* k, unsigned char* T,
unsigned char* CNT,
skinny_ctrl* p_skinny_ctrl) {
p_skinny_ctrl->func_skinny_128_384_enc (s,p_skinny_ctrl,CNT,T,k);
}
void nonce_encryption (const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
unsigned char T [16];
*((unsigned int *)&T[0]) = *((unsigned int *)&N[0]);
*((unsigned int *)&T[4]) = *((unsigned int *)&N[4]);
*((unsigned int *)&T[8]) = *((unsigned int *)&N[8]);
*((unsigned int *)&T[12]) = *((unsigned int *)&N[12]);
CNT[7] = D;
block_cipher(s,k,T,CNT,p_skinny_ctrl);
}
void generate_tag (unsigned char** c, unsigned char* s,
int n, unsigned long long* clen) {
g8A_for_Tag_Generation(s, *c);
*c = *c + n;
*c = *c - *clen;
}
unsigned long long msg_encryption_eqov16 (const unsigned char** M, unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* p_skinny_ctrl) {
rho_eqov16(*M, *c, s);
*c = *c + 16;
*M = *M + 16;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return mlen - 16;
}
unsigned long long msg_encryption_ud16 (const unsigned char** M, unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* p_skinny_ctrl) {
rho_ud16(*M, *c, s, mlen, 16);
*c = *c + mlen;
*M = *M + mlen;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return 0;
}
unsigned long long msg_decryption (unsigned char** M, const unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long clen,
skinny_ctrl* p_skinny_ctrl) {
int len8;
if (clen >= 16) {
len8 = 16;
clen = clen - 16;
}
else {
len8 = clen;
clen = 0;
}
irho(*M, *c, s, len8, 16);
*c = *c + len8;
*M = *M + len8;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return clen;
}
unsigned long long ad_encryption_eqov32 (const unsigned char** A, unsigned char* s,
const unsigned char* k, unsigned long long adlen,
unsigned char* CNT,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
unsigned char T [16];
rho_ad_eqov16(*A, s);
*A = *A + 16;
lfsr_gf56(CNT);
//pad(*A, T, 16, 16);
*((unsigned int *)&T[0]) = *((unsigned int *)&(*A)[0]);
*((unsigned int *)&T[4]) = *((unsigned int *)&(*A)[4]);
*((unsigned int *)&T[8]) = *((unsigned int *)&(*A)[8]);
*((unsigned int *)&T[12]) = *((unsigned int *)&(*A)[12]);
*A = *A + 16;
CNT[7] = D;
block_cipher(s,k,T,CNT,p_skinny_ctrl);
lfsr_gf56(CNT);
return adlen - 32;
}
unsigned long long ad_encryption_ov16 (const unsigned char** A, unsigned char* s,
const unsigned char* k, unsigned long long adlen,
unsigned char* CNT,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
unsigned char T [16];
adlen = adlen - 16;
rho_ad_eqov16(*A, s);
*A = *A + 16;
lfsr_gf56(CNT);
pad(*A, T, 16, adlen);
*A = *A + adlen;
CNT[7] = D;
block_cipher(s,k,T,CNT,p_skinny_ctrl);
lfsr_gf56(CNT);
return 0;
}
unsigned long long ad_encryption_eq16 (const unsigned char** A, unsigned char* s,
unsigned char* CNT) {
rho_ad_eqov16(*A, s);
*A = *A + 16;
lfsr_gf56(CNT);
return 0;
}
unsigned long long ad_encryption_ud16 (const unsigned char** A, unsigned char* s,
unsigned long long adlen,
unsigned char* CNT) {
rho_ad_ud16(*A, s, adlen);
*A = *A + adlen;
lfsr_gf56(CNT);
return 0;
}
int crypto_aead_encrypt (
unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec,
const unsigned char* npub,
const unsigned char* k
)
{
unsigned char s[16];
// size 7 -> 8 for word access
unsigned char CNT[8];
const unsigned char* A;
const unsigned char* M;
const unsigned char* N;
skinny_ctrl l_skinny_ctrl;
(void) nsec;
A = ad;
M = m;
N = npub;
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
*((unsigned int *)&s[0]) = 0x00000000;
*((unsigned int *)&s[4]) = 0x00000000;
*((unsigned int *)&s[8]) = 0x00000000;
*((unsigned int *)&s[12]) = 0x00000000;
reset_lfsr_gf56(CNT);
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x1a,&l_skinny_ctrl);
}
else while (adlen > 0) {
if (adlen < 16) { // The last block of AD is odd and incomplete
adlen = ad_encryption_ud16(&A,s,adlen,CNT);
nonce_encryption(N,CNT,s,k,0x1a,&l_skinny_ctrl);
}
else if (adlen == 16) { // The last block of AD is odd and complete
adlen = ad_encryption_eq16(&A,s,CNT);
nonce_encryption(N,CNT,s,k,0x18,&l_skinny_ctrl);
}
else if (adlen < (32)) { // The last block of AD is even and incomplete
adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&l_skinny_ctrl);
nonce_encryption(N,CNT,s,k,0x1a,&l_skinny_ctrl);
}
else if (adlen == (32)) { // The last block of AD is even and complete
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&l_skinny_ctrl);
nonce_encryption(N,CNT,s,k,0x18,&l_skinny_ctrl);
}
else { // A normal full pair of blocks of AD
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&l_skinny_ctrl);
}
}
// because, nonce_encryption is called at the last block of AD encryption
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
reset_lfsr_gf56(CNT);
*clen = mlen + 16;
if (mlen == 0) { // M is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x15,&l_skinny_ctrl);
}
else while (mlen > 0) {
if (mlen < 16) { // The last block of M is incomplete
mlen = msg_encryption_ud16(&M,&c,N,CNT,s,k,0x15,mlen,&l_skinny_ctrl);
}
else if (mlen == 16) { // The last block of M is complete
mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x14,mlen,&l_skinny_ctrl);
}
else { // A normal full message block
mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x04,mlen,&l_skinny_ctrl);
}
}
// Tag generation
generate_tag(&c,s,16,clen);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
unsigned char s[16];
unsigned char T[16];
// size 7 -> 8 for word access
unsigned char CNT[8];
const unsigned char* A;
unsigned char* M;
const unsigned char* N;
unsigned int i;
skinny_ctrl l_skinny_ctrl;
(void) nsec;
A = ad;
M = m;
N = npub;
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
for (i = 0; i < 16; i++) {
s[i] = 0;
}
reset_lfsr_gf56(CNT);
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x1a,&l_skinny_ctrl);
}
else while (adlen > 0) {
if (adlen < 16) { // The last block of AD is odd and incomplete
adlen = ad_encryption_ud16(&A,s,adlen,CNT);
nonce_encryption(N,CNT,s,k,0x1a,&l_skinny_ctrl);
}
else if (adlen == 16) { // The last block of AD is odd and complete
adlen = ad_encryption_eq16(&A,s,CNT);
nonce_encryption(N,CNT,s,k,0x18,&l_skinny_ctrl);
}
else if (adlen < (32)) { // The last block of AD is even and incomplete
adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&l_skinny_ctrl);
nonce_encryption(N,CNT,s,k,0x1a,&l_skinny_ctrl);
}
else if (adlen == (32)) { // The last block of AD is even and complete
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&l_skinny_ctrl);
nonce_encryption(N,CNT,s,k,0x18,&l_skinny_ctrl);
}
else { // A normal full pair of blocks of AD
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&l_skinny_ctrl);
}
}
reset_lfsr_gf56(CNT);
clen = clen - 16;
*mlen = clen;
if (clen == 0) { // C is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x15,&l_skinny_ctrl);
}
else while (clen > 0) {
if (clen < 16) { // The last block of C is incomplete
clen = msg_decryption(&M,&c,N,CNT,s,k,0x15,clen,&l_skinny_ctrl);
}
else if (clen == 16) { // The last block of C is complete
clen = msg_decryption(&M,&c,N,CNT,s,k,0x14,clen,&l_skinny_ctrl);
}
else { // A normal full message block
clen = msg_decryption(&M,&c,N,CNT,s,k,0x04,clen,&l_skinny_ctrl);
}
}
// Tag generation
g8A_for_Tag_Generation(s, T);
for (i = 0; i < 16; i++) {
if (T[i] != (*(c+i))) {
return -1;
}
}
return 0;
}
//
// NIST-developed software is provided by NIST as a public service.
// You may use, copy and distribute copies of the software in any medium,
// provided that you keep intact this entire notice. You may improve,
// modify and create derivative works of the software or any portion of
// the software, and you may copy and distribute such modifications or
// works. Modified works should carry a notice stating that you changed
// the software and should note the date and nature of any such change.
// Please explicitly acknowledge the National Institute of Standards and
// Technology as the source of the software.
//
// NIST-developed software is expressly provided "AS IS." NIST MAKES NO
// WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION
// OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST
// NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE
// UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST
// DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE
// OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY,
// RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
//
// You are solely responsible for determining the appropriateness of using and
// distributing the software and you assume all risks associated with its use,
// including but not limited to the risks and costs of program errors, compliance
// with applicable laws, damage to or loss of data, programs or equipment, and
// the unavailability or interruption of operation. This software is not intended
// to be used in any situation where a failure could cause risk of injury or
// damage to property. The software developed by NIST employees is not subject to
// copyright protection within the United States.
//
// disable deprecation for sprintf and fopen
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdio.h>
#include <string.h>
#include "crypto_aead.h"
#include "api.h"
#define KAT_SUCCESS 0
#define KAT_FILE_OPEN_ERROR -1
#define KAT_DATA_ERROR -3
#define KAT_CRYPTO_FAILURE -4
#define MAX_FILE_NAME 256
#define MAX_MESSAGE_LENGTH 32
#define MAX_ASSOCIATED_DATA_LENGTH 32
void init_buffer(unsigned char *buffer, unsigned long long numbytes);
void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, unsigned long long length);
int generate_test_vectors();
int main()
{
int ret = generate_test_vectors();
if (ret != KAT_SUCCESS) {
fprintf(stderr, "test vector generation failed with code %d\n", ret);
}
return ret;
}
int generate_test_vectors()
{
FILE *fp;
char fileName[MAX_FILE_NAME];
unsigned char key[CRYPTO_KEYBYTES];
unsigned char nonce[CRYPTO_NPUBBYTES];
unsigned char msg[MAX_MESSAGE_LENGTH];
unsigned char msg2[MAX_MESSAGE_LENGTH];
unsigned char ad[MAX_ASSOCIATED_DATA_LENGTH];
unsigned char ct[MAX_MESSAGE_LENGTH + CRYPTO_ABYTES];
unsigned long long clen, mlen2;
int count = 1;
int func_ret, ret_val = KAT_SUCCESS;
init_buffer(key, sizeof(key));
init_buffer(nonce, sizeof(nonce));
init_buffer(msg, sizeof(msg));
init_buffer(ad, sizeof(ad));
sprintf(fileName, "LWC_AEAD_KAT_%d_%d.txt", (CRYPTO_KEYBYTES * 8), (CRYPTO_NPUBBYTES * 8));
if ((fp = fopen(fileName, "w")) == NULL) {
fprintf(stderr, "Couldn't open <%s> for write\n", fileName);
return KAT_FILE_OPEN_ERROR;
}
for (unsigned long long mlen = 0; (mlen <= MAX_MESSAGE_LENGTH) && (ret_val == KAT_SUCCESS); mlen++) {
for (unsigned long long adlen = 0; adlen <= MAX_ASSOCIATED_DATA_LENGTH; adlen++) {
printf("%0d\n", (int)clen);
fprintf(fp, "Count = %d\n", count++);
printf("Count = %d\n", count - 1);
fprint_bstr(fp, "Key = ", key, CRYPTO_KEYBYTES);
fprint_bstr(fp, "Nonce = ", nonce, CRYPTO_NPUBBYTES);
fprint_bstr(fp, "PT = ", msg, mlen);
fprint_bstr(fp, "AD = ", ad, adlen);
if ((func_ret = crypto_aead_encrypt(ct, &clen, msg, mlen, ad, adlen, NULL, nonce, key)) != 0) {
fprintf(fp, "crypto_aead_encrypt returned <%d>\n", func_ret);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
fprint_bstr(fp, "CT = ", ct, clen);
fprintf(fp, "\n");
if ((func_ret = crypto_aead_decrypt(msg2, &mlen2, NULL, ct, clen, ad, adlen, nonce, key)) != 0) {
fprintf(fp, "crypto_aead_decrypt returned <%d>\n", func_ret);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
if (mlen != mlen2) {
fprintf(fp, "crypto_aead_decrypt returned bad 'mlen': Got <%llu>, expected <%llu>\n", mlen2, mlen);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
if (memcmp(msg, msg2, mlen)) {
fprintf(fp, "crypto_aead_decrypt did not recover the plaintext\n");
ret_val = KAT_CRYPTO_FAILURE;
break;
}
}
}
fclose(fp);
return ret_val;
}
void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, unsigned long long length)
{
fprintf(fp, "%s", label);
for (unsigned long long i = 0; i < length; i++)
fprintf(fp, "%02X", data[i]);
fprintf(fp, "\n");
}
void init_buffer(unsigned char *buffer, unsigned long long numbytes)
{
for (unsigned long long i = 0; i < numbytes; i++)
buffer[i] = (unsigned char)i;
}
typedef struct ___skinny_ctrl {
unsigned char roundKeys[960]; // number of round : 56
void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K);
} skinny_ctrl;
extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment