Commit 06f56ee8 by lwc-tester

Round 1 Candidates

parents

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

This source diff could not be displayed because it is too large. You can view the blob instead.
#CC=gcc -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse3 -mmmx -mavx -mavx2
#CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2 -mmmx -mavx -mavx2
CC=gcc -Wall -O3 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -mavx -mavx2
#CC=gcc -O1 -fomit-frame-pointer -march=haswell -mtune=native -mavx
#ivybridgei, skylake, sandybridge, haswell
all: aceavx_1
aceavx_1:aceavx
aceavx: speed.c ace.c
$(CC) -o $@ $^
.PHONY: clean
clean:
rm -fr aceavx
/* Reference implementation of ACE-128, AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#ifndef ACE_H
#define ACE_H
#include<stdint.h>
#include<x86intrin.h>
//#include<smmintrin.h>
//#include <immintrin.h>
#define STATEBYTES 40
#define STATEDWORD 10 // 320/32 = 8//
#define SIMECKROUND 8
#define NUMSTEPS 16
#define PARAL_INST_BY8 1
static const unsigned char SC0[16]={0x50,0x5c,0x91,0x8d,0x53,0x60,0x68,0xe1,0xf6,0x9d,0x40,0x4f,0xbe,0x5b,0xe9,0x7f}; //Step constants (SC_{2i})
static const unsigned char SC1[16]={0x28,0xae,0x48,0xc6,0xa9,0x30,0x34,0x70,0x7b,0xce,0x20,0x27,0x5f,0xad,0x74,0x3f}; //Step constants (SC_{2i+1})
static const unsigned char SC2[16]={0x14,0x57,0x24,0x63,0x54,0x18,0x9a,0x38,0xbd,0x67,0x10,0x13,0x2f,0xd6,0xba,0x1f}; //Step constants (SC_{2i+2})
static const unsigned char RC0[16]={0x07,0x0a,0x9b,0xe0,0xd1,0x1a,0x22,0xf7,0x62,0x96,0x71,0xaa,0x2b,0xe9,0xcf,0xb7};//Round constants (RC_{2i})
static const unsigned char RC1[16]={0x53,0x5d,0x49,0x7f,0xbe,0x1d,0x28,0x6c,0x82,0x47,0x6b,0x88,0xdc,0x8b,0x59,0xc6};//Round constants (RC_{2i+1})
static const unsigned char RC2[16]={0x43,0xe4,0x5e,0xcc,0x32,0x4e,0x75,0x25,0xfd,0xf9,0x76,0xa0,0xb0,0x09,0x1e,0xad};//Round constants (RC_{2i+2})
typedef unsigned long long int u64;
typedef unsigned int u32;
typedef unsigned int u8;
#define ROT5(x) (_mm256_slli_epi32(x, 5) | _mm256_srli_epi32(x, 27))
#define ROT1(x) (_mm256_slli_epi32(x, 1) | _mm256_srli_epi32(x, 31))
#define SWAPREG1(x) (_mm256_permutevar8x32_epi32(x, _mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0)))
#define RC(t1, t2) (_mm256_set_epi32(0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2,0xfffffffe^t1))
#define SC(t1, t2) (_mm256_set_epi32(0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff, 0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff))
#define SWAPBLK(x) (_mm256_permute4x64_epi64(x, _MM_SHUFFLE(2,3,0,1)))
#define SWAPREG2(x) (_mm256_permutevar8x32_epi32(x, _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0)))
#define SWAPAC(x) (_mm256_permutevar8x32_epi32(xtmp, _mm256_set_epi32(5, 4, 7, 6, 1, 0, 3, 2)))
#define masklo (_mm256_set_epi32(0x0, 0x0, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff,0xffffffff))
#define maskhi (_mm256_set_epi32(0xffffffff, 0xffffffff, 0xffffffff,0xffffffff,0x0, 0x0, 0x0, 0x0))
#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)
#define ROAX(x, y, t1, t2)\
{\
__m256i x2tmp;\
x2tmp = x;\
x = (ROT5(x)&x)^ROT1(x)^RC(t1, t2)^y;\
y = x2tmp;\
}
#define PACK_SSb(x, y)\
{\
__m256i xtmp, ytmp;\
xtmp = SWAPREG1(x);\
ytmp = SWAPREG1(y);\
x = _mm256_permute2x128_si256(xtmp,ytmp,0x20);\
y = _mm256_permute2x128_si256(xtmp,ytmp, 0x31);\
}
#define UNPACK_SSb(x, y)\
{\
__m256i xtmp, ytmp;\
xtmp = _mm256_unpacklo_epi32(x, y);\
ytmp = _mm256_unpackhi_epi32(x, y);\
x = _mm256_permute2x128_si256(xtmp, ytmp,0x20);\
y = _mm256_permute2x128_si256(xtmp, ytmp, 0x31);\
}
#define PACK(x, y, z, w)\
{\
__m256i x2tmp, x3tmp;\
x2tmp = SWAPREG2(x);\
x3tmp = SWAPREG2(z);\
x = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
z = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
x2tmp = SWAPREG2(y);\
x3tmp = SWAPREG2(w);\
y = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
w = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
}
#define UNPACK(x,y,z,w)\
{\
__m256i x2tmp, x3tmp;\
x2tmp = _mm256_unpacklo_epi64(x, z);\
x3tmp = _mm256_unpackhi_epi64(x, z);\
x = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
z = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
x2tmp = _mm256_unpacklo_epi64(y, w);\
x3tmp = _mm256_unpackhi_epi64(y, w);\
y = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
w = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
}
void ace320( u32 *state );
int crypto_aead_encrypt( u32 *tag, u32 tlen, u32 *c, u32 *m, u32 mlen, u32 *ad, u32 adlen, u8 *k, u8 *npub, u32 klen );
int crypto_aead_decrypt( u32 *m, u32 *c, u32 mlen, u32 *tag, u32 tlen, u32 *ad, u32 adlen, u8 *k, u8 *npub, u32 klen );
#endif
#ifndef CLOCK_CYCLE_H
#define CLOCK_CYCLE_H
typedef unsigned long long int u64;
u64 start_rdtsc( )
{
unsigned high, low;
__asm__ volatile("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (high),
"=r" (low):: "%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
u64 end_rdtsc( )
{
unsigned high, low;
__asm__ volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax,%1\n\t"
"CPUID\n\t": "=r" (high), "=r" (low)::
"%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
#endif
/* Reference implementation of ACE-128, AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<math.h>
#include "ace.h"
#include "clock_cycle.h"
#define NUM_ITER 2000
#define NUM_TEST 500
void print_state ( u32 *state )
{
u8 i, j;
for ( j = 0; j < 8*PARAL_INST_BY8; j++ )
{
for ( i = 0; i < STATEDWORD; i++ )
printf("%.8X", state[i+j*STATEDWORD]);
printf("\n");
}
return;
}
int main()
{
u8 num_parallel_inst;
u32 *state;
int i, j;
u64 t[NUM_ITER+1], count_cc;
u32 *plaintext, *ciphertext, *tag, *key, *nonce, *ad;
u32 plen, tlen, klen, adlen;
u8 *k, *pubn;
num_parallel_inst = 8*PARAL_INST_BY8;
adlen = 4; // Associated data length = adlen*32;
plen = 32; // Message length = plen*32;
tlen = 4; // 128 = 32*4 bits
klen = 4; // 128 = 32*4 bits
key = (u32 *)malloc(sizeof(u32)*klen*num_parallel_inst);
nonce = (u32 *)malloc(sizeof(u32)*klen*num_parallel_inst);
tag = (u32 *)malloc(sizeof(u32)*tlen*num_parallel_inst);
ad = (u32 *)malloc(sizeof(u32)*adlen*num_parallel_inst);
plaintext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
ciphertext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
k = (u8 *)malloc(sizeof(u8)*16*num_parallel_inst);
pubn = (u8 *)malloc(sizeof(u8)*16*num_parallel_inst);
state = (u32 *)malloc(sizeof(u32)*num_parallel_inst*STATEDWORD);
//Randomly initilizes messages to states
for ( i = 0; i < num_parallel_inst*STATEDWORD; i++ )
*(state+i) = i%STATEDWORD;
//*(state+i) = 0x01;
print_state(state);
ace320( state );
//ace320( state );
print_state(state);
//exit(0);
//Assinging 128-bit keys, nonces and messages//
for ( i = 0; i < num_parallel_inst*klen; i++ )
{
*(nonce+i) = 0x40404040;
*(key+i) = 0x10101010;
}
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
*(plaintext+plen*i+j) = j%plen;
}
for ( i = 0; i < num_parallel_inst*adlen; i++ )
*(ad+i) = 0xffffffff;
for ( i = 0; i < num_parallel_inst*16; i++ )
{
k[i] = 0x0;
*(pubn+i) = 0;
}
for ( i = 0; i < NUM_ITER+1; i++ )
{
count_cc = start_rdtsc();
for ( j = 0; j < NUM_TEST; j++ )
ace320( state );
count_cc = end_rdtsc()-count_cc;
printf("Cycles per byte = %f\n", (double)(count_cc)/(double)(num_parallel_inst*STATEBYTES*NUM_TEST));
}
//print_state(state);
printf("\n");
//===================================================================================================================
// AEAD: Encryption and Decryption Module//
//===================================================================================================================
printf("Nonce and Key:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
printf( "%08X%08X%08X%08X", nonce[4*i+0], nonce[4*i+1], nonce[4*i+2], nonce[4*i+3]);
printf( "%08X%08X%08X%08X\n", key[4*i+0], key[4*i+1], key[4*i+2], key[4*i+3]);
}
for ( i = 0; i < NUM_ITER; i++ )
{
count_cc = start_rdtsc();
crypto_aead_encrypt( tag, tlen, ciphertext, plaintext, plen, ad, adlen, k, pubn, 16 );
count_cc = end_rdtsc()-count_cc;
printf("Encryption speed = %f cpb\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
}
//Checking Correctness of Autheticated encryption and Decryption
crypto_aead_encrypt( tag, tlen, ciphertext, plaintext, plen, ad, adlen, k, pubn, 16 );
printf("Original plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Ciphertext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", ciphertext[i*plen+j]);
printf("\n");
}
printf("Tag:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < tlen; j++ )
printf("%08X", tag[i*tlen+j]);
printf("\n");
}
crypto_aead_decrypt( plaintext, ciphertext, plen, tag, tlen, ad, adlen, k, pubn, 16 );
printf("Decrypted plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Tag:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < tlen; j++ )
printf("%08X", tag[i*tlen+j]);
printf("\n");
}
free(state);
free(plaintext);
free(ciphertext);
free(tag);
free(key);
free(nonce);
free(ad);
return(0);
}
#CC=gcc -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse3 -mmmx -mavx -mavx2
#CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2 -mmmx -mavx -mavx2
CC=gcc -Wall -O3 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2
#CC=gcc -O1 -fomit-frame-pointer -march=haswell -mtune=native -mavx
#ivybridgei, skylake, sandybridge, haswell
all: acesse2_1
acesse2_1:acesse2
acesse2: speed.c ace.c
$(CC) -o $@ $^
.PHONY: clean
clean:
rm -fr acesse2
/* Reference implementation of ACE-128, AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#ifndef ACE_H
#define ACE_H
#include<stdint.h>
#include<x86intrin.h>
#include<smmintrin.h>
#define STATEBYTES 40
#define STATEDWORD 10 // 320/32 = 8//
#define SIMECKROUND 8
//#define NUMSTEPS 16
#define NUMSTEPS 16
#define PARAL_INST_BY4 1
//New
static const unsigned char SC0[16]={0x50,0x5c,0x91,0x8d,0x53,0x60,0x68,0xe1,0xf6,0x9d,0x40,0x4f,0xbe,0x5b,0xe9,0x7f}; //Step constants (SC_{2i})
static const unsigned char SC1[16]={0x28,0xae,0x48,0xc6,0xa9,0x30,0x34,0x70,0x7b,0xce,0x20,0x27,0x5f,0xad,0x74,0x3f}; //Step constants (SC_{2i+1})
static const unsigned char SC2[16]={0x14,0x57,0x24,0x63,0x54,0x18,0x9a,0x38,0xbd,0x67,0x10,0x13,0x2f,0xd6,0xba,0x1f}; //Step constants (SC_{2i+2})
static const unsigned char RC0[16]={0x07,0x0a,0x9b,0xe0,0xd1,0x1a,0x22,0xf7,0x62,0x96,0x71,0xaa,0x2b,0xe9,0xcf,0xb7};//Round constants (RC_{2i})
static const unsigned char RC1[16]={0x53,0x5d,0x49,0x7f,0xbe,0x1d,0x28,0x6c,0x82,0x47,0x6b,0x88,0xdc,0x8b,0x59,0xc6};//Round constants (RC_{2i+1})
static const unsigned char RC2[16]={0x43,0xe4,0x5e,0xcc,0x32,0x4e,0x75,0x25,0xfd,0xf9,0x76,0xa0,0xb0,0x09,0x1e,0xad};//Round constants (RC_{2i+2})
typedef unsigned long long int u64;
typedef unsigned int u32;
typedef unsigned int u8;
#define ROT5(x) (_mm_slli_epi32(x, 5) | _mm_srli_epi32(x, 27))
#define ROT1(x) (_mm_slli_epi32(x, 1) | _mm_srli_epi32(x, 31))
#define RC(t1, t2) (_mm_set_epi32(0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1))
#define SC(t1, t2) (_mm_set_epi32(0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff ))
#define SWAPREG1(x) (_mm_shuffle_epi32(x, _MM_SHUFFLE(3, 1, 2, 0)))
#define SWAPBLK(x) (_mm_slli_si128(x, 8)|_mm_srli_si128(x, 8))
#define masklo (_mm_set_epi32(0x0, 0x0, 0xffffffff, 0xffffffff ))
#define maskhi (_mm_set_epi32(0xffffffff, 0xffffffff, 0x0, 0x0 ))
#define ROAX(x, y, t1, t2)\
{\
__m128i xtmp;\
xtmp = x;\
x = (ROT5(x)&x)^ROT1(x)^RC(t1, t2)^y;\
y = xtmp;\
}
#define PACK_SSb(x, y)\
{\
__m128i xtmp, ytmp;\
xtmp = SWAPREG1(x);\
ytmp = SWAPREG1(y);\
x = _mm_unpacklo_epi64(xtmp, ytmp);\
y = _mm_unpackhi_epi64(xtmp, ytmp);\
}
#define UNPACK_SSb(x, y)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_unpacklo_epi32(x, y);\
ytmp = _mm_unpackhi_epi32(x, y);\
x = xtmp;\
y = ytmp;\
}
#define PACK(x, y, z, w, state, i1, i2, i3, i4)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_loadu_si128((void *) (state + i1));\
ytmp = _mm_loadu_si128((void *) (state + i2));\
x = _mm_unpacklo_epi64(xtmp, ytmp);\
z = _mm_unpackhi_epi64(xtmp, ytmp);\
xtmp = _mm_loadu_si128((void *) (state + i3));\
ytmp = _mm_loadu_si128((void *) (state + i4));\
y = _mm_unpacklo_epi64(xtmp, ytmp);\
w = _mm_unpackhi_epi64(xtmp, ytmp);\
}
#define UNPACK(x, y, z, w)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_unpacklo_epi64(x, z);\
ytmp = _mm_unpackhi_epi64(x, z);\
x = xtmp;\
z = ytmp;\
xtmp = _mm_unpacklo_epi64(y, w);\
ytmp = _mm_unpackhi_epi64(y, w);\
y = xtmp;\
w = ytmp;\
}
void ace320( u32 *state );
void ace_encrypt( u32 *tag, u32 tlen, u32 *ciphertext, u32 *plaintext, u32 plen, u32 *key, u32 *nonce, u32 klen );
int crypto_aead_encrypt( u32 *tag, u32 tlen, u32 *c, u32 *m, u32 mlen, u32 *ad, u32 adlen, u8 *k, u8 *npub, u32 klen );
int crypto_aead_decrypt( u32 *m, u32 *c, u32 mlen, u32 *tag, u32 tlen, u32 *ad, u32 adlen, u8 *k, u8 *npub, u32 klen );
#endif
#ifndef CLOCK_CYCLE_H
#define CLOCK_CYCLE_H
typedef unsigned long long int u64;
u64 start_rdtsc( )
{
unsigned high, low;
__asm__ volatile("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (high),
"=r" (low):: "%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
u64 end_rdtsc( )
{
unsigned high, low;
__asm__ volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax,%1\n\t"
"CPUID\n\t": "=r" (high), "=r" (low)::
"%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
#endif
/* Reference implementation of ACE-128, AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<math.h>
#include "ace.h"
#include "clock_cycle.h"
#define NUM_ITER 2000
#define NUM_TEST 500
void print_state ( u32 *state )
{
u8 i, j;
for ( j = 0; j < 4*PARAL_INST_BY4; j++ )
{
for ( i = 0; i < STATEDWORD; i++ )
printf("%.8X", state[i+j*STATEDWORD]);
printf("\n");
}
return;
}
int main()
{
u8 num_parallel_inst;
u32 *state;
int i, j;
u64 t[NUM_ITER+1], count_cc;
u32 *plaintext, *ciphertext, *tag, *key, *nonce, *ad;
u32 plen, tlen, klen, adlen;
u8 *k, *pubn;
num_parallel_inst = 4*PARAL_INST_BY4;
adlen = 4; // Associated data length = adlen*32;
plen = 32; // Message length = plen*32;
tlen = 4; // 128 = 32*4 bits
klen = 4; // 128 = 32*4 bits
key = (u32 *)malloc(sizeof(u32)*klen*num_parallel_inst);
nonce = (u32 *)malloc(sizeof(u32)*klen*num_parallel_inst);
tag = (u32 *)malloc(sizeof(u32)*tlen*num_parallel_inst);
ad = (u32 *)malloc(sizeof(u32)*adlen*num_parallel_inst);
plaintext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
ciphertext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
k = (u8 *)malloc(sizeof(u8)*16*num_parallel_inst);
pubn = (u8 *)malloc(sizeof(u8)*16*num_parallel_inst);
state = (u32 *)malloc(sizeof(u32)*num_parallel_inst*STATEDWORD);
//Randomly initilizes messages to states
for ( i = 0; i < num_parallel_inst*STATEDWORD; i++ )
*(state+i) = i%STATEDWORD;
//*(state+i) = 0x01;
print_state(state);
ace320( state );
//ace320( state );
print_state(state);
//exit(0);
//Assinging 128-bit keys, nonces and messages//
for ( i = 0; i < num_parallel_inst*klen; i++ )
{
*(nonce+i) = 0x40404040;
*(key+i) = 0x10101010;
}
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
*(plaintext+i*plen+j) = j%plen;
}
for ( i = 0; i < num_parallel_inst*adlen; i++ )
*(ad+i) = 0xffffffff;
for ( i = 0; i < num_parallel_inst*16; i++ )
{
*(k+i) = 0x0;
*(pubn+i) = 0x0;
}
for ( i = 0; i < NUM_ITER+1; i++ )
{
count_cc = start_rdtsc();
for ( j = 0; j < NUM_TEST; j++ )
ace320( state );
count_cc = end_rdtsc()-count_cc;
printf("Cycles per byte = %f\n", (double)(count_cc)/(double)(num_parallel_inst*STATEBYTES*NUM_TEST));
}
print_state(state);
printf("\n");
//===================================================================================================================
// AEAD: Encryption and Decryption Module//
//===================================================================================================================
printf("Nonce and Key:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
printf( "%08X%08X%08X%08X", nonce[4*i+0], nonce[4*i+1], nonce[4*i+2], nonce[4*i+3]);
printf( "%08X%08X%08X%08X\n", key[4*i+0], key[4*i+1], key[4*i+2], key[4*i+3]);
}
for ( i = 0; i < 2000; i++ )
{
count_cc = start_rdtsc();
crypto_aead_encrypt( tag, tlen, ciphertext, plaintext, plen, ad, adlen, k, pubn, 16 );
count_cc = end_rdtsc()-count_cc;
printf("Encryption speed = %f cpb\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
}
crypto_aead_encrypt( tag, tlen, ciphertext, plaintext, plen, ad, adlen, k, pubn, 16 );
printf("Original plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Ciphertext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", ciphertext[i*plen+j]);
printf("\n");
}
printf("Tag:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < tlen; j++ )
printf("%08X", tag[i*tlen+j]);
printf("\n");
}
crypto_aead_decrypt( plaintext, ciphertext, plen, tag, tlen, ad, adlen, k, pubn, 16 );
printf("Decrypted plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Tag:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < tlen; j++ )
printf("%08X", tag[i*tlen+j]);
printf("\n");
}
free(state);
free(plaintext);
free(ciphertext);
free(tag);
free(key);
free(nonce);
free(ad);
return(0);
}
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
architecture rtl of ace is
signal ctl_control : ace_ctl_ty;
signal ctl_onehot : onehot_ty;
signal ctl_lfsr_en : std_logic;
signal ctl_lfsr_reset : std_logic;
begin
u_dp :
entity work.dp port map
( clk => clk
, reset => reset
, i_mode => i_mode
, i_control => ctl_control
, i_onehot => ctl_onehot
, i_dom_sep => i_dom_sep
, i_valid => i_valid
, i_data => i_data
, i_padding => i_padding
, o_data => o_data
);
u_ctl :
entity work.ctl port map
( clk => clk
, reset => reset
, i_mode => i_mode
, i_dom_sep => i_dom_sep
, i_valid => i_valid
, i_padding => i_padding
, o_valid => o_valid
, o_onehot => ctl_onehot
, o_ready => o_ready
, o_control => ctl_control
);
end architecture;
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.ace_pkg.all;
entity ace is
port
( clk : in std_logic;
reset : in std_logic;
i_mode : in mode_ty;
i_dom_sep : in domsep_ty;
i_valid : in std_logic;
i_data : in word;
i_padding : in std_logic;
o_valid : out std_logic;
o_ready : out std_logic;
o_data : out word
);
end entity;
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
package ace_pkg is
--for constants
constant lfsr_c_sz : integer := 7;
subtype lfsr_c_output is std_logic_vector(0 to lfsr_c_sz+2);
------------------------------------------------------------
constant half_word_sz : natural := 32;
constant word_sz : natural := 2*half_word_sz;
subtype half_word is std_logic_vector( 0 to half_word_sz - 1 );
subtype word is std_logic_vector( 0 to word_sz - 1 );
type word_vector is array( natural range <> ) of word;
type half_word_vector is array( natural range <> ) of half_word;
------------------------------------------------------------
-- A, B, C, D, E
constant state_sz : natural := 320;
constant word_max_idx : natural := state_sz / word_sz - 1;
constant half_word_max_idx : natural := state_sz / half_word_sz - 1;
constant key_sz : natural := 128;
constant nonce_sz : natural := 128;
subtype word_state_ty is word_vector ( 0 to word_max_idx );
constant a_idx : natural := 0;
constant b_idx : natural := 1;
constant c_idx : natural := 2;
constant d_idx : natural := 3;
constant e_idx : natural := 4;
subtype half_word_data is half_word_vector ( 0 to 1 );
subtype half_word_state_ty is half_word_vector ( 0 to half_word_max_idx );
constant a0_idx : natural := 1;
constant a1_idx : natural := 0;
constant b0_idx : natural := 3;
constant b1_idx : natural := 2;
constant c0_idx : natural := 5;
constant c1_idx : natural := 4;
constant d0_idx : natural := 7;
constant d1_idx : natural := 6;
constant e0_idx : natural := 9;
constant e1_idx : natural := 8;
function b2x( b : boolean ) return std_logic;
function half_words_to_words( st : half_word_state_ty ) return word_state_ty;
function words_to_half_words( st : word_state_ty ) return half_word_state_ty;
------------------------------------------------------------
-- mode
subtype mode_ty is std_logic_vector( 1 downto 0 ); -- top lvl input
constant encrypt_mode : mode_ty := ( 1 => '0', 0 => '0' );
constant decrypt_mode : mode_ty := ( 1 => '0', 0 => '1' );
constant absorb_mode : mode_ty := ( 1 => '1', 0 => '0' );
constant squeeze_mode : mode_ty := ( 1 => '1', 0 => '1' );
subtype domsep_ty is std_logic_vector( 1 downto 0 ); -- top lvl input
-- derived control (from counter and more)
subtype ace_ctl_ty is std_logic_vector( 7 downto 0 );
constant absorb_idx : natural := 0;
constant replace_idx : natural := 1;
constant output_idx : natural := 2;
constant endstep_idx : natural := 3;
constant permoff_idx : natural := 4;
constant squeeze_idx : natural := 5;
constant lfsr_c_reset_idx : natural := 6;
constant lfsr_c_en_idx : natural := 7;
-- extras cntl for load, init, fin, tag, sqeeze
subtype onehot_ty is std_logic_vector( 3 downto 0); -- extrs cntl for load, init, fin, tag, sqeeze
------------------------------------------------------------
-- round and step counters
-- use last bit for end ACE perm - for o_ready
-- -> i_valid will reset the counter!
-- counter only runs if msb = 0
constant bits_counter : natural := 8;
subtype count_ty is unsigned( bits_counter - 1 downto 0 );
----------------------------------------------------------------------
-- standard vhdl operators
-- function "sll"( a : half_word; n : natural ) return half_word;
function onehot_rotate (a : onehot_ty) return onehot_ty;
function vector_to_data ( st : half_word_data ) return word;
function data_to_vector ( st : word ) return half_word_data;
----------------------------------------------------------------------
end package;
----------------------------------------------------------------------
--
----------------------------------------------------------------------
package body ace_pkg is
function onehot_rotate (a : onehot_ty)
return onehot_ty
is
variable z : onehot_ty;
begin
z(onehot_ty'high downto 1) := a(onehot_ty'high - 1 downto 0);
z(0) := a(onehot_ty'high);
return z;
end function;
function b2x( b : boolean ) return std_logic is
begin
if b then
return '1';
else
return '0';
end if;
end function;
------------------------------------------------------------
-- standard vhdl operators cast to state
------------------------------------------------------------
-- function "sll"( a : half_word; n : natural ) return half_word is
-- begin
-- return half_word( std_logic_vector( a ) sll n );
-- end function;
------------------------------------------------------------
-- state functions
------------------------------------------------------------
function half_words_to_words( st : half_word_state_ty ) ---- CHECK THIS!!!!!! PLEASE
return word_state_ty
is
variable i : natural;
variable z : word_state_ty;
begin
main_loop : for i in 0 to word_max_idx loop
z(i)(0 to half_word_sz - 1) := st(2*i);
z(i)(half_word_sz to word_sz - 1) := st(2*i+1);
end loop;
return z;
end function;
function words_to_half_words( st : word_state_ty )
return half_word_state_ty
is
variable i : natural;
variable z : half_word_state_ty;
begin
main_loop : for i in 0 to word_max_idx loop
z(2*i) := st(i)(0 to half_word_sz - 1);
z(2*i+1) := st(i)(half_word_sz to word_sz - 1);
end loop;
return z;
end function;
function data_to_vector( st : word )
return half_word_data
is
variable z : half_word_data;
begin
z(0) := st(0 to half_word_sz - 1);
z(1) := st(half_word_sz to word_sz - 1);
return z;
end function;
function vector_to_data( st : half_word_data )
return word
is
variable z : word;
begin
z(0 to half_word_sz - 1) := st(0);
z(half_word_sz to word_sz - 1) := st(1);
return z;
end function;
end package body;
if { $gui_mode } {
add wave clk
add wave reset
add wave i_mode
add wave i_dom_sep
add wave o_ready
add wave i_valid
add wave i_data
add wave i_padding
add wave o_valid
add wave o_data
if { $sim_mode eq "PROG_MODE" } then {
add wave -noupdate -divider -height 32 STUFF
add wave /uut/u_ctl/state
add wave /uut/u_ctl/o_ready
add wave /uut/u_ctl/i_valid
add wave /uut/u_dp/i_data
add wave /uut/u_ctl/o_valid
add wave /uut/u_dp/o_data
add wave -noupdate -divider -height 32 DP
add wave -radix binary /uut/u_dp/ctl_const
add wave /uut/u_dp/i_data
add wave /uut/u_dp/o_data
add wave -radix binary /uut/u_dp/ctl_const
add wave /uut/u_dp/lfsr_c_en
add wave /uut/u_dp/lfsr_c_reset
add wave /uut/u_dp/permoff
add wave /uut/u_dp/endstep
add wave /uut/u_dp/absorb
add wave /uut/u_dp/replace
add wave /uut/u_dp/output
add wave /uut/u_dp/dsxor
add wave /uut/u_dp/post_input
add wave /uut/u_dp/pre_round
add wave /uut/u_dp/post_round
add wave /uut/u_dp/post_xor
add wave /uut/u_dp/post_step_const
add wave /uut/u_dp/post_linear
add wave /uut/u_dp/ace_path
add wave /uut/u_dp/ace_state
add wave -noupdate -divider -height 32 CTL
add wave /uut/u_ctl/state
add wave -radix unsigned /uut/u_ctl/count
add wave /uut/u_ctl/i_valid
add wave /uut/u_ctl/o_valid
add wave /uut/u_ctl/o_ready
add wave -radix binary /uut/u_ctl/onehot
add wave /uut/u_ctl/lfsr_c_reset
add wave -radix binary /uut/u_ctl/i_mode
add wave -radix binary /uut/u_ctl/i_dom_sep
}
}
vcd file ace.vcd
vcd add /ace_tb/uut/*
vcd add -r *
vcd on
run -all
vcd checkpoint
vcd off
vcd flush
if { $gui_mode } {
wave zoom full
} else {
exit
}
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.ace_pkg.all;
entity dp is
port
( clk : in std_logic
; reset : in std_logic
; i_mode : in mode_ty
; i_control : in ace_ctl_ty
; i_onehot : in onehot_ty
; i_dom_sep : in domsep_ty
; i_valid : in std_logic
; i_data : in word
; i_padding : in std_logic
; o_data : out word
);
end entity;
architecture rtl of dp is
signal permoff, endstep, squeeze,
absorb, replace, output,
lfsr_reset, lfsr_en : std_logic;
signal ace_state, post_input : half_word_state_ty;
signal pre_round, post_round,
post_xor, post_step_const,
post_linear, ace_path : word_state_ty;
signal dsxor : half_word;
signal i_data_vector, o_data_vector : half_word_data;
signal ctl_const : lfsr_c_output;
begin
u_lfsr :
entity work.lfsr port map
( clk => clk
, reset => lfsr_reset
, lfsr_en => lfsr_en
, o_const => ctl_const
);
i_data_vector <= data_to_vector( i_data );
o_data <= vector_to_data( o_data_vector );
absorb <= i_control( absorb_idx );
replace <= i_control( replace_idx );
output <= i_control( output_idx );
endstep <= i_control( endstep_idx );
permoff <= i_control( permoff_idx );
squeeze <= i_control( squeeze_idx );
lfsr_reset <= i_control( lfsr_reset_idx );
lfsr_en <= i_control( lfsr_en_idx );
----------------------------------------------------------------------
-- post input: do input and domain separator and replace
post_input( a1_idx ) <= ace_state( a1_idx );
post_input( a0_idx ) <= ace_state( a0_idx );
post_input( b0_idx ) <= ace_state( b0_idx );
post_input( b1_idx ) <= ace_state( b1_idx );
post_input( c0_idx ) <= ace_state( c0_idx );
post_input( c1_idx ) <= ace_state( c1_idx );
post_input( d0_idx ) <= ace_state( d0_idx );
post_input( d1_idx ) <= ace_state( d1_idx );
post_input( e1_idx ) <= ace_state( e1_idx );
dsxor( 0 to half_word_sz - 3 ) <= ( others => '0' );
dsxor( half_word_sz - 2) <= i_dom_sep(1);
dsxor( half_word_sz - 1) <= i_dom_sep(0);
post_input( e0_idx ) <= dsxor xor ace_state( e0_idx ) when (i_valid = '1')
else ace_state( e0_idx );
----------------------------------------------------------------------
o_data_vector(0) <= ace_state(a1_idx);
---------------------------------------------------------------------
-- sb 64 ==> post round
pre_round <= half_words_to_words( post_input );
a_sb_64 :
entity work.sb_64 port map
( i_state => pre_round( a_idx )
, i_rc => ctl_const( lfsr_c_sz + 2 ) --rc0
, o_state => post_round( a_idx )
);
post_round( b_idx ) <= pre_round( b_idx );
c_sb_64 :
entity work.sb_64 port map
( i_state => pre_round( c_idx )
, i_rc => ctl_const( lfsr_c_sz + 1 ) --rc1
, o_state => post_round( c_idx )
);
post_round( d_idx ) <= pre_round( d_idx );
e_sb_64 :
entity work.sb_64 port map
( i_state => pre_round( e_idx )
, i_rc => ctl_const( lfsr_c_sz ) -- rc2
, o_state => post_round( e_idx )
);
----------------------------------------------------------------------
-- XORs to the left ==> post xor
post_xor( a_idx ) <= post_round( a_idx );
post_xor( c_idx ) <= post_round( c_idx );
post_xor( b_idx ) <= post_round( b_idx ) xor post_round( c_idx );
post_xor( d_idx ) <= post_round( d_idx ) xor post_round( e_idx );
post_xor( e_idx ) <= post_round( e_idx ) xor post_round( a_idx );
----------------------------------------------------------------------
-- XOR with step constant ==> post step const
post_step_const( a_idx ) <= post_xor( a_idx );
post_step_const( c_idx ) <= post_xor( c_idx );
post_step_const( b_idx)( 0 to 55) <= not post_xor( b_idx )( 0 to 55 );
post_step_const( b_idx)( 56 to 63) <= post_xor( b_idx )( 56 to 63 ) xor ctl_const( 2 to lfsr_c_sz + 2 ); -- sc0
post_step_const( d_idx)( 0 to 55) <= not post_xor( d_idx )( 0 to 55 );
post_step_const( d_idx)( 56 to 63) <= post_xor( d_idx )( 56 to 63 ) xor ctl_const( 1 to lfsr_c_sz + 1 ); -- sc1
post_step_const( e_idx)( 0 to 55) <= not post_xor( e_idx )( 0 to 55 );
post_step_const( e_idx)( 56 to 63) <= post_xor( e_idx )( 56 to 63 ) xor ctl_const( 0 to lfsr_c_sz ); --sc2
------------------------------------------------------------
-- post liear layer pi = (3,2,0,4,1) ==> post linear
post_linear( a_idx ) <= post_step_const( d_idx );
post_linear( b_idx ) <= post_step_const( c_idx );
post_linear( c_idx ) <= post_step_const( a_idx );
post_linear( d_idx ) <= post_step_const( e_idx );
post_linear( e_idx ) <= post_step_const( b_idx );
------------------------------------------------------------
-- update state
ace_path( a_idx ) <= post_linear( a_idx ) when endstep = '1' else post_round( a_idx );
ace_path( b_idx ) <= post_linear( b_idx ) when endstep = '1' else post_round( b_idx );
ace_path( c_idx ) <= post_linear( c_idx ) when endstep = '1' else post_round( c_idx );
ace_path( d_idx ) <= post_linear( d_idx ) when endstep = '1' else post_round( d_idx );
ace_path( e_idx ) <= post_linear( e_idx ) when endstep = '1' else post_round( e_idx );
process
begin
wait until rising_edge( clk );
ace_state <= words_to_half_words(ace_path);
end process;
end architecture;
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use work.ace_pkg.all;
entity lfsr_c is
port
( clk : in std_logic
; lfsr_c_en : in std_logic
; lfsr_c_reset : in std_logic
; o_const : out lfsr_c_output
);
end lfsr_c;
architecture rtl of lfsr_c is
signal sa: std_logic_vector(lfsr_c_sz - 1 downto 0);
signal xa: std_logic_vector(lfsr_c_sz + 2 downto 0);
begin
-- 10 output bits for the constants
o_const <= xa; -- "to" type <= "downto" type. Index flip intended
-- just rename signal
xa(lfsr_c_sz-1 downto 0) <= sa(lfsr_c_sz-1 downto 0);
-- for updates and outputs
xa(lfsr_c_sz + 2 downto lfsr_c_sz) <= xa(3 downto 1) xor xa(2 downto 0);
lfsr_shift: for i in lfsr_c_sz-1 downto 0 generate
lfsr_step: process(clk) begin
if rising_edge(clk) then
if lfsr_c_reset ='1' then
sa(i) <= '1';
elsif lfsr_c_en ='1' then
sa(i) <= xa(i+3);
end if;
end if;
end process;
end generate lfsr_shift;
end;
------------ ACE readme file ---------------
----- list of files for ACE synthesis: -----
ace_pkg.vhd -- main package
sb_64.vhd -- s-box with simeck
lfsr.vhd -- lfsr for step / round constant generation
ctl.vhd -- control (FSM)
dp.vhd -- datapath
ace.vhd -- top level entity declaration
ace-rtl.vhd -- top level architecture
----- additional files for simulation: -----
util_unsynth.vhd -- functions used in TB (general purpose)
ace_unsynth.vhd -- specific ACE functions and procedures used in TB
ace_tb.vhd -- ACE testbench
-------------- pure datapath ---------------
dp_pure.vhd -- datapath with most input/output multiplexers removed
----------- TB info (ace_tb.vhd): ----------
********
EDH is a 3-bit constant used to select which modes to test
"100" - encryption only
"010" - decyption only
"001" - hash only
"110" - encryption and decryption
etc.
********
stim_file_path -- stimulus file
output_file_path -- output file
********
------------ stimulus file format --------------
1 file = 1 set of Key, Nonce, AD, Plaintext and Ciphertext
K 00111122335588DD00111122335588DD <--- 128 bits of Key (all 128 bits in a single line)
N 111122335588DD00111122335588DD00 <--- 128 bits of Nonce (all 128 bits in a single line)
A 1122335588DD00111122335588DD00 <--- from 4 to 128 bits of AD
P 335588DD00111122335588DD001111 <--- from 4 to 128 bits of Plaintext
C F9362385DC213A07CEFEF38C34CEFF <--- from 4 to 128 bits of Ciphertext
--- padding is done by testbench
--- multiple lines for AD, Plaintext and Ciphertext are supported
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.ace_pkg.all;
entity sb_64 is
port
( i_state : in word
; i_rc : in std_logic
; o_state : out word
);
end entity;
architecture rtl of sb_64 is
signal x0, x1, z0, z1 : half_word;
signal rc : half_word;
begin
x1 <= i_state( 0 to half_word_sz-1 );
x0 <= i_state( half_word_sz to word_sz - 1 );
rc <= ( 0 to half_word_sz - 2 => '1', half_word_sz - 1 => i_rc );
z0 <= x1;
z1 <= ( ( x1(5 to half_word_sz - 1) & x1 (0 to 4) ) and x1)
xor ( x1(1 to half_word_sz - 1) & x1 (0) )
xor x0
xor rc;
o_state <= z1 & z0;
end architecture;
CC=gcc
NISTGCCFLAGS=-std=c99 -Wall -Wextra -Wshadow -fsanitize=address,undefined -O2
LFLAGS=-lm
all: ace128_1
ace128_1: ace128
ace128: genkat_aead.c encrypt.c ace.c
$(CC) $(NISTGCCFLAGS) -o $@ $^ $(LFLAGS)
.PHONY: clean
clean:
-rm ace128
/* Reference implementation of the ace-320 permutation
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<math.h>
#include<stdlib.h>
#include<stdint.h>
#include "ace.h"
static const unsigned char SC0[16]={0x50,0x5c,0x91,0x8d,0x53,0x60,0x68,0xe1,0xf6,0x9d,0x40,0x4f,0xbe,0x5b,0xe9,0x7f}; //Step constants (SC_{2i})
static const unsigned char SC1[16]={0x28,0xae,0x48,0xc6,0xa9,0x30,0x34,0x70,0x7b,0xce,0x20,0x27,0x5f,0xad,0x74,0x3f}; //Step constants (SC_{2i+1})
static const unsigned char SC2[16]={0x14,0x57,0x24,0x63,0x54,0x18,0x9a,0x38,0xbd,0x67,0x10,0x13,0x2f,0xd6,0xba,0x1f}; //Step constants (SC_{2i+2})
static const unsigned char RC0[16]={0x07,0x0a,0x9b,0xe0,0xd1,0x1a,0x22,0xf7,0x62,0x96,0x71,0xaa,0x2b,0xe9,0xcf,0xb7};//Round constants (RC_{2i})
static const unsigned char RC1[16]={0x53,0x5d,0x49,0x7f,0xbe,0x1d,0x28,0x6c,0x82,0x47,0x6b,0x88,0xdc,0x8b,0x59,0xc6};//Round constants (RC_{2i+1})
static const unsigned char RC2[16]={0x43,0xe4,0x5e,0xcc,0x32,0x4e,0x75,0x25,0xfd,0xf9,0x76,0xa0,0xb0,0x09,0x1e,0xad};//Round constants (RC_{2i+2})
unsigned char rotl8 ( const unsigned char x, const unsigned char y, const unsigned char shift )
{
return ((x<<shift)|(y>>(8-shift)));
}
/***********************************************************
******* ACE permutation implementation ********************
*********************************************************/
void ace_print_state( const unsigned char *state )
{
unsigned char i;
for ( i = 0; i < STATEBYTES; i++ )
printf("%02X", state[i]);
printf("\n");
}
void ace_print_data(const uint8_t *x, const uint32_t xlen )
{
uint32_t j;
for ( j = 0; j < xlen; j++ )
printf("%.2x ", x[j]);
printf("\n");
return;
}
void simeck64_box( unsigned char *output, const unsigned char *input, const unsigned char rc )
{
unsigned char i, t;
unsigned char *tmp_shift_1, *tmp_shift_5, *tmp_pt;
tmp_shift_1 = (unsigned char *)malloc(4*sizeof(unsigned char));
tmp_shift_5 = (unsigned char *)malloc(4*sizeof(unsigned char));
tmp_pt = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
for ( i = 0; i < SIMECKBYTES; i++ )
tmp_pt[i] = input[i];
for ( i = 0; i < SIMECKROUND; i++ )
{
tmp_shift_1[0] = rotl8(tmp_pt[0], tmp_pt[1],1);
tmp_shift_1[1] = rotl8(tmp_pt[1], tmp_pt[2],1);
tmp_shift_1[2] = rotl8(tmp_pt[2], tmp_pt[3],1);
tmp_shift_1[3] = rotl8(tmp_pt[3], tmp_pt[0],1);
tmp_shift_5[0] = rotl8(tmp_pt[0], tmp_pt[1],5);
tmp_shift_5[1] = rotl8(tmp_pt[1], tmp_pt[2],5);
tmp_shift_5[2] = rotl8(tmp_pt[2], tmp_pt[3],5);
tmp_shift_5[3] = rotl8(tmp_pt[3], tmp_pt[0],5);
tmp_shift_5[0] = tmp_shift_5[0]&tmp_pt[0];
tmp_shift_5[1] = tmp_shift_5[1]&tmp_pt[1];
tmp_shift_5[2] = tmp_shift_5[2]&tmp_pt[2];
tmp_shift_5[3] = tmp_shift_5[3]&tmp_pt[3];
tmp_shift_1[0] = tmp_shift_1[0]^tmp_shift_5[0];
tmp_shift_1[1] = tmp_shift_1[1]^tmp_shift_5[1];
tmp_shift_1[2] = tmp_shift_1[2]^tmp_shift_5[2];
tmp_shift_1[3] = tmp_shift_1[3]^tmp_shift_5[3];
tmp_shift_1[0] = tmp_shift_1[0]^tmp_pt[4]^(0xff);
tmp_shift_1[1] = tmp_shift_1[1]^tmp_pt[5]^(0xff);
tmp_shift_1[2] = tmp_shift_1[2]^tmp_pt[6]^(0xff);
tmp_shift_1[3] = tmp_shift_1[3]^tmp_pt[7]^(0xfe);
t = (rc >> i)&1;
tmp_shift_1[3] = tmp_shift_1[3]^t;
tmp_pt[4] = tmp_pt[0];
tmp_pt[5] = tmp_pt[1];
tmp_pt[6] = tmp_pt[2];
tmp_pt[7] = tmp_pt[3];
tmp_pt[0] = tmp_shift_1[0];
tmp_pt[1] = tmp_shift_1[1];
tmp_pt[2] = tmp_shift_1[2];
tmp_pt[3] = tmp_shift_1[3];
//simeck_print_data(tmp_pt, 8);
}
for ( i = 0; i < SIMECKBYTES; i++ )
output[i] = tmp_pt[i];
free(tmp_shift_1);
free(tmp_shift_5);
free(tmp_pt);
return;
}
void ace_permutation( unsigned char *input )
{
unsigned char i, j;
unsigned char *tmp_inp, *tmp_a, *tmp_c, *tmp_e;
tmp_inp = (unsigned char *)malloc(STATEBYTES*sizeof(unsigned char));
tmp_a = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
tmp_c = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
tmp_e = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
for ( i = 0; i < STATEBYTES; i++ )
tmp_inp[i] = input[i];
for ( i = 0; i < NUMSTEPS; i++ )
{
//A block
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_a[j] = tmp_inp[j];
simeck64_box( tmp_a, tmp_a, RC0[i] );
//C block
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_c[j] = tmp_inp[2*SIMECKBYTES+j];
simeck64_box( tmp_c, tmp_c, RC1[i] );
//E block
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_e[j] = tmp_inp[4*SIMECKBYTES+j];
simeck64_box( tmp_e, tmp_e, RC2[i] );
// Update A: A <= SC_{3i+1}+D+F(E)
for ( j = 0; j < SIMECKBYTES-1; j++ )
tmp_inp[j] = tmp_inp[3*SIMECKBYTES+j]^tmp_e[j]^(0xff);
tmp_inp[SIMECKBYTES-1] = tmp_inp[4*SIMECKBYTES-1]^tmp_e[SIMECKBYTES-1]^SC1[i];
// Update E: E <= SC_{3i}+B+F(C)
for ( j = 0; j < SIMECKBYTES-1; j++ )
tmp_inp[4*SIMECKBYTES+j] = tmp_inp[SIMECKBYTES+j]^tmp_c[j]^(0xff);
tmp_inp[5*SIMECKBYTES-1] = tmp_inp[2*SIMECKBYTES-1]^tmp_c[SIMECKBYTES-1]^SC0[i];
// Update B: B <= F(C)
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_inp[SIMECKBYTES+j] = tmp_c[j];
// Update C: C <= F(A)
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_inp[2*SIMECKBYTES+j] = tmp_a[j];
// Update D: D <= SC_{3i+2}+F(A)+F(E)
for ( j = 0; j < SIMECKBYTES-1; j++ )
tmp_inp[3*SIMECKBYTES+j] = tmp_a[j]^tmp_e[j]^(0xff);
tmp_inp[4*SIMECKBYTES-1] = tmp_a[SIMECKBYTES-1]^tmp_e[SIMECKBYTES-1]^SC2[i];
//ace_print_state(tmp_inp); // Printing intermediate state
}
for ( i = 0; i < STATEBYTES; i++ )
input[i] = tmp_inp[i];
free(tmp_a);
free(tmp_c);
free(tmp_e);
free(tmp_inp);
return;
}
void ace_permutation_ALLZERO ( unsigned char *state )
{
unsigned char i;
for ( i = 0; i < STATEBYTES; i++ )
state[i] = 0x0;
ace_print_state(state);
ace_permutation(state);
return;
}
void ace_permutation_ALLONE ( unsigned char *state )
{
unsigned char i;
for ( i = 0; i < STATEBYTES; i++ )
state[i] = 0xff;
//ace_print_state( state );
ace_permutation(state);
return;
}
/* Reference implementation of the ACE permutation
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#ifndef ACE_H
#define ACE_H
#include<math.h>
#include<stdlib.h>
#include<stdint.h>
#define STATEBYTES 40 //Number OF BYTES = 320/8 = 40
#define SIMECKBYTES 8 //Number of Simeck BYTES = 64/8 = 8
#define SIMECKROUND 8 //Number of rounds
#define NUMSTEPS 16 //Number of steps
typedef unsigned long long u64;
unsigned char rotl8 ( const unsigned char x, const unsigned char y, const unsigned char shift );
void ace_print_data(const unsigned char *x, const uint32_t xlen );
void simeck_print_data(const unsigned char *y, const unsigned char ylen );
void simeck64_box( unsigned char *output, const unsigned char *input, const unsigned char rc );
void ace_permutation( unsigned char *input );
void ace_print_state( const unsigned char *state );
void ace_permutation_ALLZERO ( unsigned char *state );
void ace_permutation_ALLONE ( unsigned char *state );
#endif
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
/* Reference implementation of ACE-128 AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
typedef unsigned long long u64;
int ace_init(
unsigned char *state,
const unsigned char *npub,
const unsigned char *k
);
int ace_ad(
unsigned char *state,
const unsigned char *ad,
const u64 adlen
);
int ace_gentag(
unsigned char *tag,
const unsigned char tlen,
unsigned char *state,
const unsigned char *k
);
int crypto_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_decrypt(
unsigned char *m,unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
int ace_gentag(
unsigned char *tag,
const unsigned char tlen,
unsigned char *state,
const unsigned char *k
);
/* Reference implementation of ACE-128 AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<math.h>
#include<stdlib.h>
#include<stdint.h>
#include "ace.h"
#include "crypto_aead.h"
#include "api.h"
#define KAT_SUCCESS 0
#define KAT_FILE_OPEN_ERROR -1
#define KAT_DATA_ERROR -3
#define KAT_CRYPTO_FAILURE -4
const unsigned char rate_bytes[8] = {0,1,2,3,16,17,18,19};
int ace_init(
unsigned char *state,
const unsigned char *npub,
const unsigned char *k
)
{
unsigned char i;
//Initialize the state to all-ZERO
for ( i = 0; i < STATEBYTES; i++ )
state[i] = 0x0;
if ( CRYPTO_KEYBYTES == 16 && CRYPTO_NPUBBYTES == 16 )
{
//Assigning key at A[0..7] & C[0..7]
for ( i = 0; i < 8; i++ )
state[i] = k[i];
for ( i = 0; i < 8; i++ )
state[16+i] = k[8+i];
//Assigning nonce at B[0..7] & E[0..7]
for ( i = 0; i < 8; i++ )
state[8+i] = npub[i];
for ( i = 0; i < 8; i++ )
state[32+i] = npub[8+i];
ace_permutation(state);
for ( i = 0; i < 8; i++ )
state[rate_bytes[i]]^=k[i];
ace_permutation(state);
for ( i = 0; i < 8; i++ )
state[rate_bytes[i]]^=k[8+i];
ace_permutation(state);
}
else
{
return KAT_CRYPTO_FAILURE;
}
return KAT_SUCCESS;
}
int ace_ad(
unsigned char *state,
const unsigned char *ad,
const u64 adlen
)
{
unsigned char i, lblen;
u64 j, ad64len = adlen/8;
lblen = (unsigned char)(adlen%8);
if ( adlen == 0 )
return(KAT_SUCCESS);
//Absorbing associated data
for ( j = 0; j < ad64len; j++ )
{
for ( i = 0; i < 8; i++ )
state[rate_bytes[i]]^=ad[8*j+((u64)i)];
//Domain seperator
state[STATEBYTES-1]^=(0x01);
ace_permutation(state);
}
//Process the last 64-bit block.
if ( lblen != 0 )
{
for ( i = 0; i < lblen; i++ )
state[rate_bytes[i]]^=ad[ad64len*8+(u64)i];
state[rate_bytes[lblen]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x01);
ace_permutation(state );
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x01);
ace_permutation(state );
}
return (KAT_SUCCESS);
}
int ace_gentag(
unsigned char *tag,
const unsigned char tlen,
unsigned char *state,
const unsigned char *k
)
{
unsigned char i;
if ( CRYPTO_KEYBYTES == 16 && tlen == 16 )
{
for ( i = 0; i < 8; i++ )
state[rate_bytes[i]]^=k[i];
ace_permutation(state);
for ( i = 0; i < 8; i++ )
state[rate_bytes[i]]^=k[8+i];
ace_permutation(state);
//Extracting 128-bit tag from X1 and X3
for ( i = 0; i < 8; i++ )
{
tag[i] = state[i];
tag[8+i] = state[16+i];
}
}
else
{
printf("Invalid key and tag length pair.\n");
return KAT_CRYPTO_FAILURE;
}
return KAT_SUCCESS;
}
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
unsigned char *state;
unsigned char *tag;
unsigned char i, lblen;
u64 j, m64len;
m64len = mlen/8;
lblen = (unsigned char)(mlen%8);
state = (unsigned char *)malloc(sizeof(unsigned char)*STATEBYTES);
tag = (unsigned char *)malloc(sizeof(unsigned char)*CRYPTO_ABYTES);
//Initialize state with "key" and "nonce" and then absorbe "key" again
if ( ace_init(state, npub, k)!= KAT_SUCCESS )
return(KAT_CRYPTO_FAILURE);
//Absorbing "ad"
if ( adlen != 0 )
{
if ( ace_ad( state, ad, adlen) != KAT_SUCCESS)
return(KAT_CRYPTO_FAILURE);
}
//Encrypting "message(m)" and producing "ciphertext (c)"
if ( mlen != 0 )
{
for ( j = 0; j < m64len; j++ )
{
for ( i = 0; i < 8; i++ )
{
c[8*j+((u64)i)] = m[8*j+((u64)i)]^state[rate_bytes[i]];
state[rate_bytes[i]] = c[8*j+((u64)i)];
}
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state);
}
if ( lblen != 0 )
{
//Encrypting the padded 64-bit block when "mlen" is not a multiple of 8
for ( i = 0; i < lblen; i++ )
{
c[8*m64len+((u64)i)] = m[m64len*8+(u64)i]^state[rate_bytes[i]];
state[rate_bytes[i]] = c[8*m64len+((u64)i)];
}
state[rate_bytes[lblen]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state);
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state );
}
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state );
}
//Appending tag to the end of ciphertext
if ( ace_gentag( tag, CRYPTO_ABYTES, state, k ) != KAT_SUCCESS )
return(KAT_CRYPTO_FAILURE);
else
{
for ( i = 0; i < CRYPTO_ABYTES; i++ )
c[mlen+(u64)i] = tag[i];
}
*clen = mlen+CRYPTO_ABYTES;
/*printf("Print tag after enc.:\n");
for ( i = 0; i < 16; i++ )
printf("%.2X", tag[i]);
printf("\n");*/
free(state);
free(tag);
return KAT_SUCCESS;
}
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
unsigned char i, lblen;
u64 j, clen1, c64len;
clen1 = clen-CRYPTO_ABYTES;
c64len = clen1/8;
lblen = (unsigned char)(clen1%8);
unsigned char *state;
unsigned char *tag;
state = (unsigned char *)malloc(sizeof(unsigned char)*STATEBYTES);
tag = (unsigned char *)malloc(sizeof(unsigned char)*CRYPTO_ABYTES);
//Initialize state with "key" and "nonce" and then absorbe "key" again
if ( ace_init(state, npub, k)!= KAT_SUCCESS )
return(KAT_CRYPTO_FAILURE);
//Absorbing "ad"
if ( adlen != 0 )
{
if ( ace_ad( state, ad, adlen) != KAT_SUCCESS)
return(KAT_CRYPTO_FAILURE);
}
if ( clen1 != 0 )
{
for ( j = 0; j < c64len; j++ )
{
for ( i = 0; i < 8; i++ )
{
m[8*j+((u64)i)] = c[8*j+((u64)i)]^state[rate_bytes[i]];
state[rate_bytes[i]] = c[8*j+((u64)i)];
}
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state);
}
if ( lblen != 0 )
{
//Decrypting last 64-bit block
for ( i = 0; i < lblen; i++ )
{
m[8*c64len +((u64)i)] = c[8*c64len +((u64)i)]^state[rate_bytes[i]];
state[rate_bytes[i]] = c[8*c64len +((u64)i)];
}
state[rate_bytes[i]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state);
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state );
}
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state );
}
//Generating and verifying the tag
if ( ace_gentag( tag, CRYPTO_ABYTES, state, k ) != KAT_SUCCESS )
return(KAT_CRYPTO_FAILURE);
else
{
for ( i = 0; i < CRYPTO_ABYTES; i++ )
{
if ( c[clen1 + (u64)i] != tag[i] )
return(KAT_CRYPTO_FAILURE);
}
}
*mlen = clen-CRYPTO_ABYTES;
/*printf("Print tag after dec.:\n");
for ( i = 0; i < 16; i++ )
printf("%.2X", tag[i]);
printf("\n");*/
free(state);
free(tag);
return KAT_SUCCESS;
}
//
// NIST-developed software is provided by NIST as a public service.
// You may use, copy and distribute copies of the software in any medium,
// provided that you keep intact this entire notice. You may improve,
// modify and create derivative works of the software or any portion of
// the software, and you may copy and distribute such modifications or
// works. Modified works should carry a notice stating that you changed
// the software and should note the date and nature of any such change.
// Please explicitly acknowledge the National Institute of Standards and
// Technology as the source of the software.
//
// NIST-developed software is expressly provided "AS IS." NIST MAKES NO
// WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION
// OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST
// NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE
// UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST
// DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE
// OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY,
// RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
//
// You are solely responsible for determining the appropriateness of using and
// distributing the software and you assume all risks associated with its use,
// including but not limited to the risks and costs of program errors, compliance
// with applicable laws, damage to or loss of data, programs or equipment, and
// the unavailability or interruption of operation. This software is not intended
// to be used in any situation where a failure could cause risk of injury or
// damage to property. The software developed by NIST employees is not subject to
// copyright protection within the United States.
//
// disable deprecation for sprintf and fopen
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdio.h>
#include <string.h>
#include "crypto_aead.h"
#include "api.h"
#define KAT_SUCCESS 0
#define KAT_FILE_OPEN_ERROR -1
#define KAT_DATA_ERROR -3
#define KAT_CRYPTO_FAILURE -4
#define MAX_FILE_NAME 256
#define MAX_MESSAGE_LENGTH 32
#define MAX_ASSOCIATED_DATA_LENGTH 32
void init_buffer(unsigned char *buffer, unsigned long long numbytes);
void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, unsigned long long length);
int generate_test_vectors();
int main()
{
int ret = generate_test_vectors();
if (ret != KAT_SUCCESS) {
fprintf(stderr, "test vector generation failed with code %d\n", ret);
}
return ret;
}
int generate_test_vectors()
{
FILE *fp;
char fileName[MAX_FILE_NAME];
unsigned char key[CRYPTO_KEYBYTES];
unsigned char nonce[CRYPTO_NPUBBYTES];
unsigned char msg[MAX_MESSAGE_LENGTH];
unsigned char msg2[MAX_MESSAGE_LENGTH];
unsigned char ad[MAX_ASSOCIATED_DATA_LENGTH];
unsigned char ct[MAX_MESSAGE_LENGTH + CRYPTO_ABYTES];
unsigned long long clen, mlen2;
int count = 1;
int func_ret, ret_val = KAT_SUCCESS;
init_buffer(key, sizeof(key));
init_buffer(nonce, sizeof(nonce));
init_buffer(msg, sizeof(msg));
init_buffer(ad, sizeof(ad));
sprintf(fileName, "../LWC_AEAD_KAT_%d_%d.txt", (CRYPTO_KEYBYTES * 8), (CRYPTO_NPUBBYTES * 8));
if ((fp = fopen(fileName, "w")) == NULL) {
fprintf(stderr, "Couldn't open <%s> for write\n", fileName);
return KAT_FILE_OPEN_ERROR;
}
for (unsigned long long mlen = 0; (mlen <= MAX_MESSAGE_LENGTH) && (ret_val == KAT_SUCCESS); mlen++) {
for (unsigned long long adlen = 0; adlen <= MAX_ASSOCIATED_DATA_LENGTH; adlen++) {
fprintf(fp, "Count = %d\n", count++);
fprint_bstr(fp, "Key = ", key, CRYPTO_KEYBYTES);
fprint_bstr(fp, "Nonce = ", nonce, CRYPTO_NPUBBYTES);
fprint_bstr(fp, "PT = ", msg, mlen);
fprint_bstr(fp, "AD = ", ad, adlen);
if ((func_ret = crypto_aead_encrypt(ct, &clen, msg, mlen, ad, adlen, NULL, nonce, key)) != 0) {
fprintf(fp, "crypto_aead_encrypt returned <%d>\n", func_ret);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
fprint_bstr(fp, "CT = ", ct, clen);
fprintf(fp, "\n");
if ((func_ret = crypto_aead_decrypt(msg2, &mlen2, NULL, ct, clen, ad, adlen, nonce, key)) != 0) {
fprintf(fp, "crypto_aead_decrypt returned <%d>\n", func_ret);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
if (mlen != mlen2) {
fprintf(fp, "crypto_aead_decrypt returned bad 'mlen': Got <%llu>, expected <%llu>\n", mlen2, mlen);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
if (memcmp(msg, msg2, mlen)) {
fprintf(fp, "crypto_aead_decrypt did not recover the plaintext\n");
ret_val = KAT_CRYPTO_FAILURE;
break;
}
}
}
fclose(fp);
return ret_val;
}
void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, unsigned long long length)
{
fprintf(fp, "%s", label);
for (unsigned long long i = 0; i < length; i++)
fprintf(fp, "%02X", data[i]);
fprintf(fp, "\n");
}
void init_buffer(unsigned char *buffer, unsigned long long numbytes)
{
for (unsigned long long i = 0; i < numbytes; i++)
buffer[i] = (unsigned char)i;
}
This source diff could not be displayed because it is too large. You can view the blob instead.
#CC=gcc -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse3 -mmmx -mavx -mavx2
#CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2 -mmmx -mavx -mavx2
CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -mavx -mavx2
#CC=gcc -O1 -fomit-frame-pointer -march=haswell -mtune=native -mavx
#ivybridgei, skylake, sandybridge, haswell
all: aceavx_1
aceavx_1:aceavx
aceavx: speed.c ace.c
$(CC) -o $@ $^
.PHONY: clean
clean:
rm -fr aceavx
/* Reference implementation of ACE-Hash256
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#ifndef ACE_H
#define ACE_H
#include<stdint.h>
#include<x86intrin.h>
#include<smmintrin.h>
#include<immintrin.h>
#define STATEBYTES 40
#define STATEDWORD 10 // 320/32 = 8//
#define SIMECKROUND 8
//#define NUMSTEPS 16
#define NUMSTEPS 16
#define PARAL_INST_BY8 1
//New
static const unsigned char SC0[16]={0x50,0x5c,0x91,0x8d,0x53,0x60,0x68,0xe1,0xf6,0x9d,0x40,0x4f,0xbe,0x5b,0xe9,0x7f}; //Step constants (SC_{2i})
static const unsigned char SC1[16]={0x28,0xae,0x48,0xc6,0xa9,0x30,0x34,0x70,0x7b,0xce,0x20,0x27,0x5f,0xad,0x74,0x3f}; //Step constants (SC_{2i+1})
static const unsigned char SC2[16]={0x14,0x57,0x24,0x63,0x54,0x18,0x9a,0x38,0xbd,0x67,0x10,0x13,0x2f,0xd6,0xba,0x1f}; //Step constants (SC_{2i+2})
static const unsigned char RC0[16]={0x07,0x0a,0x9b,0xe0,0xd1,0x1a,0x22,0xf7,0x62,0x96,0x71,0xaa,0x2b,0xe9,0xcf,0xb7};//Round constants (RC_{2i})
static const unsigned char RC1[16]={0x53,0x5d,0x49,0x7f,0xbe,0x1d,0x28,0x6c,0x82,0x47,0x6b,0x88,0xdc,0x8b,0x59,0xc6};//Round constants (RC_{2i+1})
static const unsigned char RC2[16]={0x43,0xe4,0x5e,0xcc,0x32,0x4e,0x75,0x25,0xfd,0xf9,0x76,0xa0,0xb0,0x09,0x1e,0xad};//Round constants (RC_{2i+2})
typedef unsigned long long int u64;
typedef unsigned int u32;
typedef unsigned int u8;
#define ROT5(x) (_mm256_slli_epi32(x, 5) | _mm256_srli_epi32(x, 27))
#define ROT1(x) (_mm256_slli_epi32(x, 1) | _mm256_srli_epi32(x, 31))
#define SWAPREG1(x) (_mm256_permutevar8x32_epi32(x, _mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0)))
#define RC(t1, t2) (_mm256_set_epi32(0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2,0xfffffffe^t1))
#define SC(t1, t2) (_mm256_set_epi32(0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff, 0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff))
#define SWAPBLK(x) (_mm256_permute4x64_epi64(x, _MM_SHUFFLE(2,3,0,1)))
#define SWAPREG2(x) (_mm256_permutevar8x32_epi32(x, _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0)))
#define SWAPAC(x) (_mm256_permutevar8x32_epi32(xtmp, _mm256_set_epi32(5, 4, 7, 6, 1, 0, 3, 2)))
#define masklo (_mm256_set_epi32(0x0, 0x0, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff,0xffffffff))
#define maskhi (_mm256_set_epi32(0xffffffff, 0xffffffff, 0xffffffff,0xffffffff,0x0, 0x0, 0x0, 0x0))
#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)
#define ROAX(x, y, t1, t2)\
{\
__m256i x2tmp;\
x2tmp = x;\
x = (ROT5(x)&x)^ROT1(x)^RC(t1, t2)^y;\
y = x2tmp;\
}
#define PACK_SSb(x, y)\
{\
__m256i xtmp, ytmp;\
xtmp = SWAPREG1(x);\
ytmp = SWAPREG1(y);\
x = _mm256_permute2x128_si256(xtmp,ytmp,0x20);\
y = _mm256_permute2x128_si256(xtmp,ytmp, 0x31);\
}
#define UNPACK_SSb(x, y)\
{\
__m256i xtmp, ytmp;\
xtmp = _mm256_unpacklo_epi32(x, y);\
ytmp = _mm256_unpackhi_epi32(x, y);\
x = _mm256_permute2x128_si256(xtmp, ytmp,0x20);\
y = _mm256_permute2x128_si256(xtmp, ytmp, 0x31);\
}
#define PACK(x, y, z, w)\
{\
__m256i x2tmp, x3tmp;\
x2tmp = SWAPREG2(x);\
x3tmp = SWAPREG2(z);\
x = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
z = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
x2tmp = SWAPREG2(y);\
x3tmp = SWAPREG2(w);\
y = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
w = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
}
#define UNPACK(x,y,z,w)\
{\
__m256i x2tmp, x3tmp;\
x2tmp = _mm256_unpacklo_epi64(x, z);\
x3tmp = _mm256_unpackhi_epi64(x, z);\
x = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
z = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
x2tmp = _mm256_unpacklo_epi64(y, w);\
x3tmp = _mm256_unpackhi_epi64(y, w);\
y = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
w = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
}
void ace320( u32 *state );
int crypto_hash( u32 *out, u32 *in, u64 inlen );
#endif
#ifndef CLOCK_CYCLE_H
#define CLOCK_CYCLE_H
typedef unsigned long long int u64;
u64 start_rdtsc( )
{
unsigned high, low;
__asm__ volatile("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (high),
"=r" (low):: "%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
u64 end_rdtsc( )
{
unsigned high, low;
__asm__ volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax,%1\n\t"
"CPUID\n\t": "=r" (high), "=r" (low)::
"%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
static inline u64 cpucycles( )
{
u64 result;
asm volatile (".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
: "=a" (result) :: "%rdx");
return result;
}
/*#ifdef __x86_64__
#define mycpucycles(RES) \
__asm__ volatile("rdtsc;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (RES) :: "%rdx");
#else
#define mycpucycles(RES) \
__asm__ volatile(".byte 15;.byte 49" : "=A" (RES));
#endif
*/
#endif
\ No newline at end of file
/* Reference implementation of ACE-Hash256
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<math.h>
#include "ace.h"
#include "clock_cycle.h"
#define NUM_ITER 2000
#define NUM_TEST 500
void print_state ( u32 *state )
{
u8 i, j;
for ( j = 0; j < 8*PARAL_INST_BY8; j++ )
{
for ( i = 0; i < STATEDWORD; i++ )
printf("%.8X", state[i+j*STATEDWORD]);
printf("\n");
}
return;
}
int main()
{
u8 num_parallel_inst;
u32 *state;
int i, j;
u64 t[NUM_ITER+1], count_cc;
u32 *plaintext, *digest;
u32 hlen;
u64 plen;
num_parallel_inst = 8*PARAL_INST_BY8;
plen = 32; // Message length = plen*32 bits;
hlen = 8; //256 = 32*8 bits
digest = (u32 *)malloc(sizeof(u32)*hlen*num_parallel_inst);
plaintext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
state = (u32 *)malloc(sizeof(u32)*num_parallel_inst*STATEDWORD);
//Randomly generating messages
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
plaintext[i*plen+j] = j%128;
}
//===================================================================================================================
// Hash Module//
//===================================================================================================================
for ( i = 0; i < NUM_ITER; i++ )
{
//plaintext[0] = plaintext[0]^i;
count_cc = start_rdtsc();
crypto_hash( digest, plaintext, plen );
count_cc = end_rdtsc()-count_cc;
printf("Hash speed = %f cbp\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
}
printf("Hash speed = %f cbp\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
//plen = 0;
crypto_hash( digest, plaintext, plen );
printf("Original plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Digest:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < hlen; j++ )
printf("%08X", digest[i*hlen+j]);
printf("\n");
}
free(state);
free(plaintext);
free(digest);
return(0);
}
#CC=gcc -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse3 -mmmx -mavx -mavx2
#CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2 -mmmx -mavx -mavx2
CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2
#CC=gcc -O1 -fomit-frame-pointer -march=haswell -mtune=native -mavx
#ivybridgei, skylake, sandybridge, haswell
all: acesse2_1
acesse2_1:acesse2
acesse2: speed.c ace.c
$(CC) -o $@ $^
.PHONY: clean
clean:
rm -fr acesse2
/* Reference implementation of ACE-Hash256
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#ifndef ACE_H
#define ACE_H
#include<stdint.h>
#include<x86intrin.h>
#include<smmintrin.h>
#define STATEBYTES 40
#define STATEDWORD 10 // 320/32 = 8//
#define SIMECKROUND 8
#define NUMSTEPS 16
#define PARAL_INST_BY4 1
static const unsigned char SC0[16]={0x50,0x5c,0x91,0x8d,0x53,0x60,0x68,0xe1,0xf6,0x9d,0x40,0x4f,0xbe,0x5b,0xe9,0x7f}; //Step constants (SC_{2i})
static const unsigned char SC1[16]={0x28,0xae,0x48,0xc6,0xa9,0x30,0x34,0x70,0x7b,0xce,0x20,0x27,0x5f,0xad,0x74,0x3f}; //Step constants (SC_{2i+1})
static const unsigned char SC2[16]={0x14,0x57,0x24,0x63,0x54,0x18,0x9a,0x38,0xbd,0x67,0x10,0x13,0x2f,0xd6,0xba,0x1f}; //Step constants (SC_{2i+2})
static const unsigned char RC0[16]={0x07,0x0a,0x9b,0xe0,0xd1,0x1a,0x22,0xf7,0x62,0x96,0x71,0xaa,0x2b,0xe9,0xcf,0xb7};//Round constants (RC_{2i})
static const unsigned char RC1[16]={0x53,0x5d,0x49,0x7f,0xbe,0x1d,0x28,0x6c,0x82,0x47,0x6b,0x88,0xdc,0x8b,0x59,0xc6};//Round constants (RC_{2i+1})
static const unsigned char RC2[16]={0x43,0xe4,0x5e,0xcc,0x32,0x4e,0x75,0x25,0xfd,0xf9,0x76,0xa0,0xb0,0x09,0x1e,0xad};//Round constants (RC_{2i+2})
typedef unsigned long long int u64;
typedef unsigned int u32;
typedef unsigned int u8;
#define ROT5(x) (_mm_slli_epi32(x, 5) | _mm_srli_epi32(x, 27))
#define ROT1(x) (_mm_slli_epi32(x, 1) | _mm_srli_epi32(x, 31))
#define RC(t1, t2) (_mm_set_epi32(0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1))
#define SC(t1, t2) (_mm_set_epi32(0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff ))
#define SWAPREG1(x) (_mm_shuffle_epi32(x, _MM_SHUFFLE(3, 1, 2, 0)))
#define SWAPBLK(x) (_mm_slli_si128(x, 8)|_mm_srli_si128(x, 8))
#define masklo (_mm_set_epi32(0x0, 0x0, 0xffffffff, 0xffffffff ))
#define maskhi (_mm_set_epi32(0xffffffff, 0xffffffff, 0x0, 0x0 ))
#define ROAX(x, y, t1, t2)\
{\
__m128i xtmp;\
xtmp = x;\
x = (ROT5(x)&x)^ROT1(x)^RC(t1, t2)^y;\
y = xtmp;\
}
#define PACK_SSb(x, y)\
{\
__m128i xtmp, ytmp;\
xtmp = SWAPREG1(x);\
ytmp = SWAPREG1(y);\
x = _mm_unpacklo_epi64(xtmp, ytmp);\
y = _mm_unpackhi_epi64(xtmp, ytmp);\
}
#define UNPACK_SSb(x, y)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_unpacklo_epi32(x, y);\
ytmp = _mm_unpackhi_epi32(x, y);\
x = xtmp;\
y = ytmp;\
}
#define PACK(x, y, z, w, state, i1, i2, i3, i4)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_loadu_si128((void *) (state + i1));\
ytmp = _mm_loadu_si128((void *) (state + i2));\
x = _mm_unpacklo_epi64(xtmp, ytmp);\
z = _mm_unpackhi_epi64(xtmp, ytmp);\
xtmp = _mm_loadu_si128((void *) (state + i3));\
ytmp = _mm_loadu_si128((void *) (state + i4));\
y = _mm_unpacklo_epi64(xtmp, ytmp);\
w = _mm_unpackhi_epi64(xtmp, ytmp);\
}
#define UNPACK(x, y, z, w)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_unpacklo_epi64(x, z);\
ytmp = _mm_unpackhi_epi64(x, z);\
x = xtmp;\
z = ytmp;\
xtmp = _mm_unpacklo_epi64(y, w);\
ytmp = _mm_unpackhi_epi64(y, w);\
y = xtmp;\
w = ytmp;\
}
void ace320( u32 *state );
int crypto_hash( u32 *out, u32 *in, u64 inlen );
#endif
#ifndef CLOCK_CYCLE_H
#define CLOCK_CYCLE_H
typedef unsigned long long int u64;
u64 start_rdtsc( )
{
unsigned high, low;
__asm__ volatile("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (high),
"=r" (low):: "%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
u64 end_rdtsc( )
{
unsigned high, low;
__asm__ volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax,%1\n\t"
"CPUID\n\t": "=r" (high), "=r" (low)::
"%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
static inline u64 cpucycles( )
{
u64 result;
asm volatile (".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
: "=a" (result) :: "%rdx");
return result;
}
/*#ifdef __x86_64__
#define mycpucycles(RES) \
__asm__ volatile("rdtsc;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (RES) :: "%rdx");
#else
#define mycpucycles(RES) \
__asm__ volatile(".byte 15;.byte 49" : "=A" (RES));
#endif
*/
#endif
\ No newline at end of file
/* Reference implementation of ACE-Hash256
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<math.h>
#include "ace.h"
#include "clock_cycle.h"
#define NUM_ITER 2000
#define NUM_TEST 500
void print_state ( u32 *state )
{
u8 i, j;
for ( j = 0; j < 4*PARAL_INST_BY4; j++ )
{
for ( i = 0; i < STATEDWORD; i++ )
printf("%.8X", state[i+j*STATEDWORD]);
printf("\n");
}
return;
}
int main()
{
u8 num_parallel_inst;
u32 *state;
int i, j;
u64 t[NUM_ITER+1], count_cc;
u32 *plaintext, *digest;
u64 plen;
u32 hlen;
num_parallel_inst = 4*PARAL_INST_BY4;
plen = 32; // Message length = plen*32;
hlen = 8; //256 = 32*8 bits
digest = (u32 *)malloc(sizeof(u32)*hlen*num_parallel_inst);
plaintext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
state = (u32 *)malloc(sizeof(u32)*num_parallel_inst*STATEDWORD);
//Randomly generating messages//
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
plaintext[i*plen+j] = j%128;
}
//===================================================================================================================
// HASH Mode//
//===================================================================================================================
//Testing speed for ACE-Hash
for ( i = 0; i < NUM_ITER; i++ )
{
count_cc = start_rdtsc();
crypto_hash ( digest, plaintext, plen );
count_cc = end_rdtsc()-count_cc;
printf("Hash speed = %f cpb\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
}
printf("Hash speed = %f cbp\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
//Conputing hash
crypto_hash ( digest, plaintext, plen );
printf("Original plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Digest:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < hlen; j++ )
printf("%08X", digest[i*hlen+j]);
printf("\n");
}
free(state);
free(plaintext);
free(digest);
return(0);
}
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
architecture rtl of ace is
signal ctl_control : ace_ctl_ty;
signal ctl_onehot : onehot_ty;
signal ctl_lfsr_en : std_logic;
signal ctl_lfsr_reset : std_logic;
begin
u_dp :
entity work.dp port map
( clk => clk
, reset => reset
, i_mode => i_mode
, i_control => ctl_control
, i_onehot => ctl_onehot
, i_dom_sep => i_dom_sep
, i_valid => i_valid
, i_data => i_data
, i_padding => i_padding
, o_data => o_data
);
u_ctl :
entity work.ctl port map
( clk => clk
, reset => reset
, i_mode => i_mode
, i_dom_sep => i_dom_sep
, i_valid => i_valid
, i_padding => i_padding
, o_valid => o_valid
, o_onehot => ctl_onehot
, o_ready => o_ready
, o_control => ctl_control
);
end architecture;
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.ace_pkg.all;
entity ace is
port
( clk : in std_logic;
reset : in std_logic;
i_mode : in mode_ty;
i_dom_sep : in domsep_ty;
i_valid : in std_logic;
i_data : in word;
i_padding : in std_logic;
o_valid : out std_logic;
o_ready : out std_logic;
o_data : out word
);
end entity;
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
package ace_pkg is
--for constants
constant lfsr_c_sz : integer := 7;
subtype lfsr_c_output is std_logic_vector(0 to lfsr_c_sz+2);
------------------------------------------------------------
constant half_word_sz : natural := 32;
constant word_sz : natural := 2*half_word_sz;
subtype half_word is std_logic_vector( 0 to half_word_sz - 1 );
subtype word is std_logic_vector( 0 to word_sz - 1 );
type word_vector is array( natural range <> ) of word;
type half_word_vector is array( natural range <> ) of half_word;
------------------------------------------------------------
-- A, B, C, D, E
constant state_sz : natural := 320;
constant word_max_idx : natural := state_sz / word_sz - 1;
constant half_word_max_idx : natural := state_sz / half_word_sz - 1;
constant key_sz : natural := 128;
constant nonce_sz : natural := 128;
subtype word_state_ty is word_vector ( 0 to word_max_idx );
constant a_idx : natural := 0;
constant b_idx : natural := 1;
constant c_idx : natural := 2;
constant d_idx : natural := 3;
constant e_idx : natural := 4;
subtype half_word_data is half_word_vector ( 0 to 1 );
subtype half_word_state_ty is half_word_vector ( 0 to half_word_max_idx );
constant a0_idx : natural := 1;
constant a1_idx : natural := 0;
constant b0_idx : natural := 3;
constant b1_idx : natural := 2;
constant c0_idx : natural := 5;
constant c1_idx : natural := 4;
constant d0_idx : natural := 7;
constant d1_idx : natural := 6;
constant e0_idx : natural := 9;
constant e1_idx : natural := 8;
function b2x( b : boolean ) return std_logic;
function half_words_to_words( st : half_word_state_ty ) return word_state_ty;
function words_to_half_words( st : word_state_ty ) return half_word_state_ty;
------------------------------------------------------------
-- mode
subtype mode_ty is std_logic_vector( 1 downto 0 ); -- top lvl input
constant encrypt_mode : mode_ty := ( 1 => '0', 0 => '0' );
constant decrypt_mode : mode_ty := ( 1 => '0', 0 => '1' );
constant absorb_mode : mode_ty := ( 1 => '1', 0 => '0' );
constant squeeze_mode : mode_ty := ( 1 => '1', 0 => '1' );
subtype domsep_ty is std_logic_vector( 1 downto 0 ); -- top lvl input
-- derived control (from counter and more)
subtype ace_ctl_ty is std_logic_vector( 7 downto 0 );
constant absorb_idx : natural := 0;
constant replace_idx : natural := 1;
constant output_idx : natural := 2;
constant endstep_idx : natural := 3;
constant permoff_idx : natural := 4;
constant squeeze_idx : natural := 5;
constant lfsr_c_reset_idx : natural := 6;
constant lfsr_c_en_idx : natural := 7;
-- extras cntl for load, init, fin, tag, sqeeze
subtype onehot_ty is std_logic_vector( 3 downto 0); -- extrs cntl for load, init, fin, tag, sqeeze
------------------------------------------------------------
-- round and step counters
-- use last bit for end ACE perm - for o_ready
-- -> i_valid will reset the counter!
-- counter only runs if msb = 0
constant bits_counter : natural := 8;
subtype count_ty is unsigned( bits_counter - 1 downto 0 );
----------------------------------------------------------------------
-- standard vhdl operators
-- function "sll"( a : half_word; n : natural ) return half_word;
function onehot_rotate (a : onehot_ty) return onehot_ty;
function vector_to_data ( st : half_word_data ) return word;
function data_to_vector ( st : word ) return half_word_data;
----------------------------------------------------------------------
end package;
----------------------------------------------------------------------
--
----------------------------------------------------------------------
package body ace_pkg is
function onehot_rotate (a : onehot_ty)
return onehot_ty
is
variable z : onehot_ty;
begin
z(onehot_ty'high downto 1) := a(onehot_ty'high - 1 downto 0);
z(0) := a(onehot_ty'high);
return z;
end function;
function b2x( b : boolean ) return std_logic is
begin
if b then
return '1';
else
return '0';
end if;
end function;
------------------------------------------------------------
-- standard vhdl operators cast to state
------------------------------------------------------------
-- function "sll"( a : half_word; n : natural ) return half_word is
-- begin
-- return half_word( std_logic_vector( a ) sll n );
-- end function;
------------------------------------------------------------
-- state functions
------------------------------------------------------------
function half_words_to_words( st : half_word_state_ty ) ---- CHECK THIS!!!!!! PLEASE
return word_state_ty
is
variable i : natural;
variable z : word_state_ty;
begin
main_loop : for i in 0 to word_max_idx loop
z(i)(0 to half_word_sz - 1) := st(2*i);
z(i)(half_word_sz to word_sz - 1) := st(2*i+1);
end loop;
return z;
end function;
function words_to_half_words( st : word_state_ty )
return half_word_state_ty
is
variable i : natural;
variable z : half_word_state_ty;
begin
main_loop : for i in 0 to word_max_idx loop
z(2*i) := st(i)(0 to half_word_sz - 1);
z(2*i+1) := st(i)(half_word_sz to word_sz - 1);
end loop;
return z;
end function;
function data_to_vector( st : word )
return half_word_data
is
variable z : half_word_data;
begin
z(0) := st(0 to half_word_sz - 1);
z(1) := st(half_word_sz to word_sz - 1);
return z;
end function;
function vector_to_data( st : half_word_data )
return word
is
variable z : word;
begin
z(0 to half_word_sz - 1) := st(0);
z(half_word_sz to word_sz - 1) := st(1);
return z;
end function;
end package body;
if { $gui_mode } {
add wave clk
add wave reset
add wave i_mode
add wave i_dom_sep
add wave o_ready
add wave i_valid
add wave i_data
add wave i_padding
add wave o_valid
add wave o_data
if { $sim_mode eq "PROG_MODE" } then {
add wave -noupdate -divider -height 32 STUFF
add wave /uut/u_ctl/state
add wave /uut/u_ctl/o_ready
add wave /uut/u_ctl/i_valid
add wave /uut/u_dp/i_data
add wave /uut/u_ctl/o_valid
add wave /uut/u_dp/o_data
add wave -noupdate -divider -height 32 DP
add wave -radix binary /uut/u_dp/ctl_const
add wave /uut/u_dp/i_data
add wave /uut/u_dp/o_data
add wave -radix binary /uut/u_dp/ctl_const
add wave /uut/u_dp/lfsr_c_en
add wave /uut/u_dp/lfsr_c_reset
add wave /uut/u_dp/permoff
add wave /uut/u_dp/endstep
add wave /uut/u_dp/absorb
add wave /uut/u_dp/replace
add wave /uut/u_dp/output
add wave /uut/u_dp/dsxor
add wave /uut/u_dp/post_input
add wave /uut/u_dp/pre_round
add wave /uut/u_dp/post_round
add wave /uut/u_dp/post_xor
add wave /uut/u_dp/post_step_const
add wave /uut/u_dp/post_linear
add wave /uut/u_dp/ace_path
add wave /uut/u_dp/ace_state
add wave -noupdate -divider -height 32 CTL
add wave /uut/u_ctl/state
add wave -radix unsigned /uut/u_ctl/count
add wave /uut/u_ctl/i_valid
add wave /uut/u_ctl/o_valid
add wave /uut/u_ctl/o_ready
add wave -radix binary /uut/u_ctl/onehot
add wave /uut/u_ctl/lfsr_c_reset
add wave -radix binary /uut/u_ctl/i_mode
add wave -radix binary /uut/u_ctl/i_dom_sep
}
}
vcd file ace.vcd
vcd add /ace_tb/uut/*
vcd add -r *
vcd on
run -all
vcd checkpoint
vcd off
vcd flush
if { $gui_mode } {
wave zoom full
} else {
exit
}
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.ace_pkg.all;
entity dp is
port
( clk : in std_logic
; reset : in std_logic
; i_mode : in mode_ty
; i_control : in ace_ctl_ty
; i_onehot : in onehot_ty
; i_dom_sep : in domsep_ty
; i_valid : in std_logic
; i_data : in word
; i_padding : in std_logic
; o_data : out word
);
end entity;
architecture rtl of dp is
signal permoff, endstep, squeeze,
absorb, replace, output,
lfsr_reset, lfsr_en : std_logic;
signal ace_state, post_input : half_word_state_ty;
signal pre_round, post_round,
post_xor, post_step_const,
post_linear, ace_path : word_state_ty;
signal dsxor : half_word;
signal i_data_vector, o_data_vector : half_word_data;
signal ctl_const : lfsr_c_output;
begin
u_lfsr :
entity work.lfsr port map
( clk => clk
, reset => lfsr_reset
, lfsr_en => lfsr_en
, o_const => ctl_const
);
i_data_vector <= data_to_vector( i_data );
o_data <= vector_to_data( o_data_vector );
absorb <= i_control( absorb_idx );
replace <= i_control( replace_idx );
output <= i_control( output_idx );
endstep <= i_control( endstep_idx );
permoff <= i_control( permoff_idx );
squeeze <= i_control( squeeze_idx );
lfsr_reset <= i_control( lfsr_reset_idx );
lfsr_en <= i_control( lfsr_en_idx );
----------------------------------------------------------------------
-- post input: do input and domain separator and replace
post_input( a1_idx ) <= ace_state( a1_idx );
post_input( a0_idx ) <= ace_state( a0_idx );
post_input( b0_idx ) <= ace_state( b0_idx );
post_input( b1_idx ) <= ace_state( b1_idx );
post_input( c0_idx ) <= ace_state( c0_idx );
post_input( c1_idx ) <= ace_state( c1_idx );
post_input( d0_idx ) <= ace_state( d0_idx );
post_input( d1_idx ) <= ace_state( d1_idx );
post_input( e1_idx ) <= ace_state( e1_idx );
dsxor( 0 to half_word_sz - 3 ) <= ( others => '0' );
dsxor( half_word_sz - 2) <= i_dom_sep(1);
dsxor( half_word_sz - 1) <= i_dom_sep(0);
post_input( e0_idx ) <= dsxor xor ace_state( e0_idx ) when (i_valid = '1')
else ace_state( e0_idx );
----------------------------------------------------------------------
o_data_vector(0) <= ace_state(a1_idx);
---------------------------------------------------------------------
-- sb 64 ==> post round
pre_round <= half_words_to_words( post_input );
a_sb_64 :
entity work.sb_64 port map
( i_state => pre_round( a_idx )
, i_rc => ctl_const( lfsr_c_sz + 2 ) --rc0
, o_state => post_round( a_idx )
);
post_round( b_idx ) <= pre_round( b_idx );
c_sb_64 :
entity work.sb_64 port map
( i_state => pre_round( c_idx )
, i_rc => ctl_const( lfsr_c_sz + 1 ) --rc1
, o_state => post_round( c_idx )
);
post_round( d_idx ) <= pre_round( d_idx );
e_sb_64 :
entity work.sb_64 port map
( i_state => pre_round( e_idx )
, i_rc => ctl_const( lfsr_c_sz ) -- rc2
, o_state => post_round( e_idx )
);
----------------------------------------------------------------------
-- XORs to the left ==> post xor
post_xor( a_idx ) <= post_round( a_idx );
post_xor( c_idx ) <= post_round( c_idx );
post_xor( b_idx ) <= post_round( b_idx ) xor post_round( c_idx );
post_xor( d_idx ) <= post_round( d_idx ) xor post_round( e_idx );
post_xor( e_idx ) <= post_round( e_idx ) xor post_round( a_idx );
----------------------------------------------------------------------
-- XOR with step constant ==> post step const
post_step_const( a_idx ) <= post_xor( a_idx );
post_step_const( c_idx ) <= post_xor( c_idx );
post_step_const( b_idx)( 0 to 55) <= not post_xor( b_idx )( 0 to 55 );
post_step_const( b_idx)( 56 to 63) <= post_xor( b_idx )( 56 to 63 ) xor ctl_const( 2 to lfsr_c_sz + 2 ); -- sc0
post_step_const( d_idx)( 0 to 55) <= not post_xor( d_idx )( 0 to 55 );
post_step_const( d_idx)( 56 to 63) <= post_xor( d_idx )( 56 to 63 ) xor ctl_const( 1 to lfsr_c_sz + 1 ); -- sc1
post_step_const( e_idx)( 0 to 55) <= not post_xor( e_idx )( 0 to 55 );
post_step_const( e_idx)( 56 to 63) <= post_xor( e_idx )( 56 to 63 ) xor ctl_const( 0 to lfsr_c_sz ); --sc2
------------------------------------------------------------
-- post liear layer pi = (3,2,0,4,1) ==> post linear
post_linear( a_idx ) <= post_step_const( d_idx );
post_linear( b_idx ) <= post_step_const( c_idx );
post_linear( c_idx ) <= post_step_const( a_idx );
post_linear( d_idx ) <= post_step_const( e_idx );
post_linear( e_idx ) <= post_step_const( b_idx );
------------------------------------------------------------
-- update state
ace_path( a_idx ) <= post_linear( a_idx ) when endstep = '1' else post_round( a_idx );
ace_path( b_idx ) <= post_linear( b_idx ) when endstep = '1' else post_round( b_idx );
ace_path( c_idx ) <= post_linear( c_idx ) when endstep = '1' else post_round( c_idx );
ace_path( d_idx ) <= post_linear( d_idx ) when endstep = '1' else post_round( d_idx );
ace_path( e_idx ) <= post_linear( e_idx ) when endstep = '1' else post_round( e_idx );
process
begin
wait until rising_edge( clk );
ace_state <= words_to_half_words(ace_path);
end process;
end architecture;
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use work.ace_pkg.all;
entity lfsr_c is
port
( clk : in std_logic
; lfsr_c_en : in std_logic
; lfsr_c_reset : in std_logic
; o_const : out lfsr_c_output
);
end lfsr_c;
architecture rtl of lfsr_c is
signal sa: std_logic_vector(lfsr_c_sz - 1 downto 0);
signal xa: std_logic_vector(lfsr_c_sz + 2 downto 0);
begin
-- 10 output bits for the constants
o_const <= xa; -- "to" type <= "downto" type. Index flip intended
-- just rename signal
xa(lfsr_c_sz-1 downto 0) <= sa(lfsr_c_sz-1 downto 0);
-- for updates and outputs
xa(lfsr_c_sz + 2 downto lfsr_c_sz) <= xa(3 downto 1) xor xa(2 downto 0);
lfsr_shift: for i in lfsr_c_sz-1 downto 0 generate
lfsr_step: process(clk) begin
if rising_edge(clk) then
if lfsr_c_reset ='1' then
sa(i) <= '1';
elsif lfsr_c_en ='1' then
sa(i) <= xa(i+3);
end if;
end if;
end process;
end generate lfsr_shift;
end;
------------ ACE readme file ---------------
----- list of files for ACE synthesis: -----
ace_pkg.vhd -- main package
sb_64.vhd -- s-box with simeck
lfsr.vhd -- lfsr for step / round constant generation
ctl.vhd -- control (FSM)
dp.vhd -- datapath
ace.vhd -- top level entity declaration
ace-rtl.vhd -- top level architecture
----- additional files for simulation: -----
util_unsynth.vhd -- functions used in TB (general purpose)
ace_unsynth.vhd -- specific ACE functions and procedures used in TB
ace_tb.vhd -- ACE testbench
-------------- pure datapath ---------------
dp_pure.vhd -- datapath with most input/output multiplexers removed
----------- TB info (ace_tb.vhd): ----------
********
EDH is a 3-bit constant used to select which modes to test
"100" - encryption only
"010" - decyption only
"001" - hash only
"110" - encryption and decryption
etc.
********
stim_file_path -- stimulus file
output_file_path -- output file
********
------------ stimulus file format --------------
1 file = 1 set of Key, Nonce, AD, Plaintext and Ciphertext
K 00111122335588DD00111122335588DD <--- 128 bits of Key (all 128 bits in a single line)
N 111122335588DD00111122335588DD00 <--- 128 bits of Nonce (all 128 bits in a single line)
A 1122335588DD00111122335588DD00 <--- from 4 to 128 bits of AD
P 335588DD00111122335588DD001111 <--- from 4 to 128 bits of Plaintext
C F9362385DC213A07CEFEF38C34CEFF <--- from 4 to 128 bits of Ciphertext
--- padding is done by testbench
--- multiple lines for AD, Plaintext and Ciphertext are supported
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.ace_pkg.all;
entity sb_64 is
port
( i_state : in word
; i_rc : in std_logic
; o_state : out word
);
end entity;
architecture rtl of sb_64 is
signal x0, x1, z0, z1 : half_word;
signal rc : half_word;
begin
x1 <= i_state( 0 to half_word_sz-1 );
x0 <= i_state( half_word_sz to word_sz - 1 );
rc <= ( 0 to half_word_sz - 2 => '1', half_word_sz - 1 => i_rc );
z0 <= x1;
z1 <= ( ( x1(5 to half_word_sz - 1) & x1 (0 to 4) ) and x1)
xor ( x1(1 to half_word_sz - 1) & x1 (0) )
xor x0
xor rc;
o_state <= z1 & z0;
end architecture;
CC=gcc
NISTGCCFLAGS =-std=c99 -Wall -Wextra -Wshadow -fsanitize=address,undefined -O2
LFLAGS=-lm
all: acehash256_1
#all:myacetest1
acehash256_1:acehash256
#myacetest1:myacetest
acehash256: genkat_hash.c hash.c ace.c
$(CC) $(NISTGCCFLAGS) -o $@ $^ $(LFLAGS)
.PHONY: clean
clean:
-rm acehash256
/* Reference implementation of the ACE permutation
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<math.h>
#include<stdlib.h>
#include<stdint.h>
#include "ace.h"
static const unsigned char SC0[16]={0x50,0x5c,0x91,0x8d,0x53,0x60,0x68,0xe1,0xf6,0x9d,0x40,0x4f,0xbe,0x5b,0xe9,0x7f}; //Step constants (SC_{2i})
static const unsigned char SC1[16]={0x28,0xae,0x48,0xc6,0xa9,0x30,0x34,0x70,0x7b,0xce,0x20,0x27,0x5f,0xad,0x74,0x3f}; //Step constants (SC_{2i+1})
static const unsigned char SC2[16]={0x14,0x57,0x24,0x63,0x54,0x18,0x9a,0x38,0xbd,0x67,0x10,0x13,0x2f,0xd6,0xba,0x1f}; //Step constants (SC_{2i+2})
static const unsigned char RC0[16]={0x07,0x0a,0x9b,0xe0,0xd1,0x1a,0x22,0xf7,0x62,0x96,0x71,0xaa,0x2b,0xe9,0xcf,0xb7};//Round constants (RC_{2i})
static const unsigned char RC1[16]={0x53,0x5d,0x49,0x7f,0xbe,0x1d,0x28,0x6c,0x82,0x47,0x6b,0x88,0xdc,0x8b,0x59,0xc6};//Round constants (RC_{2i+1})
static const unsigned char RC2[16]={0x43,0xe4,0x5e,0xcc,0x32,0x4e,0x75,0x25,0xfd,0xf9,0x76,0xa0,0xb0,0x09,0x1e,0xad};//Round constants (RC_{2i+2})
unsigned char rotl8 ( const unsigned char x, const unsigned char y, const unsigned char shift )
{
return ((x<<shift)|(y>>(8-shift)));
}
/***********************************************************
******* ACE permutation implementation********************
*********************************************************/
void ace_print_state( const unsigned char *state )
{
unsigned char i;
for ( i = 0; i < STATEBYTES; i++ )
printf("%.2x ", state[i]);
printf("\n");
}
void ace_print_data(const uint8_t *x, const uint32_t xlen )
{
uint32_t j;
for ( j = 0; j < xlen; j++ )
printf("%.2x ", x[j]);
printf("\n");
return;
}
void simeck64_box( unsigned char *output, const unsigned char *input, const unsigned char rc )
{
unsigned char i, t;
unsigned char *tmp_shift_1, *tmp_shift_5, *tmp_pt;
tmp_shift_1 = (unsigned char *)malloc(4*sizeof(unsigned char));
tmp_shift_5 = (unsigned char *)malloc(4*sizeof(unsigned char));
tmp_pt = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
for ( i = 0; i < SIMECKBYTES; i++ )
tmp_pt[i] = input[i];
for ( i = 0; i < SIMECKROUND; i++ )
{
tmp_shift_1[0] = rotl8(tmp_pt[0], tmp_pt[1],1);
tmp_shift_1[1] = rotl8(tmp_pt[1], tmp_pt[2],1);
tmp_shift_1[2] = rotl8(tmp_pt[2], tmp_pt[3],1);
tmp_shift_1[3] = rotl8(tmp_pt[3], tmp_pt[0],1);
tmp_shift_5[0] = rotl8(tmp_pt[0], tmp_pt[1],5);
tmp_shift_5[1] = rotl8(tmp_pt[1], tmp_pt[2],5);
tmp_shift_5[2] = rotl8(tmp_pt[2], tmp_pt[3],5);
tmp_shift_5[3] = rotl8(tmp_pt[3], tmp_pt[0],5);
tmp_shift_5[0] = tmp_shift_5[0]&tmp_pt[0];
tmp_shift_5[1] = tmp_shift_5[1]&tmp_pt[1];
tmp_shift_5[2] = tmp_shift_5[2]&tmp_pt[2];
tmp_shift_5[3] = tmp_shift_5[3]&tmp_pt[3];
tmp_shift_1[0] = tmp_shift_1[0]^tmp_shift_5[0];
tmp_shift_1[1] = tmp_shift_1[1]^tmp_shift_5[1];
tmp_shift_1[2] = tmp_shift_1[2]^tmp_shift_5[2];
tmp_shift_1[3] = tmp_shift_1[3]^tmp_shift_5[3];
tmp_shift_1[0] = tmp_shift_1[0]^tmp_pt[4]^(0xff);
tmp_shift_1[1] = tmp_shift_1[1]^tmp_pt[5]^(0xff);
tmp_shift_1[2] = tmp_shift_1[2]^tmp_pt[6]^(0xff);
tmp_shift_1[3] = tmp_shift_1[3]^tmp_pt[7]^(0xfe);
t = (rc >> i)&1;
tmp_shift_1[3] = tmp_shift_1[3]^t;
tmp_pt[4] = tmp_pt[0];
tmp_pt[5] = tmp_pt[1];
tmp_pt[6] = tmp_pt[2];
tmp_pt[7] = tmp_pt[3];
tmp_pt[0] = tmp_shift_1[0];
tmp_pt[1] = tmp_shift_1[1];
tmp_pt[2] = tmp_shift_1[2];
tmp_pt[3] = tmp_shift_1[3];
//simeck_print_data(tmp_pt, 8);
}
for ( i = 0; i < SIMECKBYTES; i++ )
output[i] = tmp_pt[i];
free(tmp_shift_1);
free(tmp_shift_5);
free(tmp_pt);
return;
}
void ace_permutation( unsigned char *input )
{
unsigned char i, j;
unsigned char *tmp_inp, *tmp_a, *tmp_c, *tmp_e;
tmp_inp = (unsigned char *)malloc(STATEBYTES*sizeof(unsigned char));
tmp_a = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
tmp_c = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
tmp_e = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
for ( i = 0; i < STATEBYTES; i++ )
tmp_inp[i] = input[i];
for ( i = 0; i < NUMSTEPS; i++ )
{
//A block
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_a[j] = tmp_inp[j];
simeck64_box( tmp_a, tmp_a, RC0[i] );
//C block
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_c[j] = tmp_inp[2*SIMECKBYTES+j];
simeck64_box( tmp_c, tmp_c, RC1[i] );
//E block
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_e[j] = tmp_inp[4*SIMECKBYTES+j];
simeck64_box( tmp_e, tmp_e, RC2[i] );
// Update A: A <= SC_{3i+1}+D+F(E)
for ( j = 0; j < SIMECKBYTES-1; j++ )
tmp_inp[j] = tmp_inp[3*SIMECKBYTES+j]^tmp_e[j]^(0xff);
tmp_inp[SIMECKBYTES-1] = tmp_inp[4*SIMECKBYTES-1]^tmp_e[SIMECKBYTES-1]^SC1[i];
// Update E: E <= SC_{3i}+B+F(C)
for ( j = 0; j < SIMECKBYTES-1; j++ )
tmp_inp[4*SIMECKBYTES+j] = tmp_inp[SIMECKBYTES+j]^tmp_c[j]^(0xff);
tmp_inp[5*SIMECKBYTES-1] = tmp_inp[2*SIMECKBYTES-1]^tmp_c[SIMECKBYTES-1]^SC0[i];
// Update B: B <= F(C)
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_inp[SIMECKBYTES+j] = tmp_c[j];
// Update C: C <= F(A)
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_inp[2*SIMECKBYTES+j] = tmp_a[j];
// Update D: D <= SC_{3i+2}+F(A)+F(E)
for ( j = 0; j < SIMECKBYTES-1; j++ )
tmp_inp[3*SIMECKBYTES+j] = tmp_a[j]^tmp_e[j]^(0xff);
tmp_inp[4*SIMECKBYTES-1] = tmp_a[SIMECKBYTES-1]^tmp_e[SIMECKBYTES-1]^SC2[i];
//ace_print_state256(tmp_pt); // Printing intermediate state
}
for ( i = 0; i < STATEBYTES; i++ )
input[i] = tmp_inp[i];
free(tmp_a);
free(tmp_c);
free(tmp_e);
free(tmp_inp);
return;
}
void ace_permutation_ALLZERO ( unsigned char *state )
{
unsigned char i;
for ( i = 0; i < STATEBYTES; i++ )
state[i] = 0x0;
ace_print_state(state);
ace_permutation(state);
return;
}
void ace_permutation_ALLONE ( unsigned char *state )
{
unsigned char i;
for ( i = 0; i < STATEBYTES; i++ )
state[i] = 0xff;
//ace_print_state( state );
ace_permutation(state);
return;
}
/* Reference implementation of the ACE permutation
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#ifndef ACE_H
#define ACE_H
#include<math.h>
#include<stdlib.h>
#include<stdint.h>
#define STATEBYTES 40 //Number OF BYTES = 320/8 = 40
#define SIMECKBYTES 8 //Number of Simeck BYTES = 64/8 = 8
#define SIMECKROUND 8 //Number of rounds
#define NUMSTEPS 16 //Number of steps
typedef unsigned long long u64;
unsigned char rotl8 ( const unsigned char x, const unsigned char y, const unsigned char shift );
void ace_print_data(const unsigned char *x, const uint32_t xlen );
void simeck_print_data(const unsigned char *y, const unsigned char ylen );
void simeck64_box( unsigned char *output, const unsigned char *input, const unsigned char rc );
void ace_permutation( unsigned char *input );
void ace_print_state( const unsigned char *state );
void ace_permutation_ALLZERO ( unsigned char *state );
void ace_permutation_ALLONE ( unsigned char *state );
#endif
int acehash_init( unsigned char *state );
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
);
//
// NIST-developed software is provided by NIST as a public service.
// You may use, copy and distribute copies of the software in any medium,
// provided that you keep intact this entire notice. You may improve,
// modify and create derivative works of the software or any portion of
// the software, and you may copy and distribute such modifications or
// works. Modified works should carry a notice stating that you changed
// the software and should note the date and nature of any such change.
// Please explicitly acknowledge the National Institute of Standards and
// Technology as the source of the software.
//
// NIST-developed software is expressly provided "AS IS." NIST MAKES NO
// WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION
// OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST
// NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE
// UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST
// DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE
// OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY,
// RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
//
// You are solely responsible for determining the appropriateness of using and
// distributing the software and you assume all risks associated with its use,
// including but not limited to the risks and costs of program errors, compliance
// with applicable laws, damage to or loss of data, programs or equipment, and
// the unavailability or interruption of operation. This software is not intended
// to be used in any situation where a failure could cause risk of injury or
// damage to property. The software developed by NIST employees is not subject to
// copyright protection within the United States.
//
// disable deprecation for sprintf and fopen
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdio.h>
#include <string.h>
#include "crypto_hash.h"
#include "api.h"
#define KAT_SUCCESS 0
#define KAT_FILE_OPEN_ERROR -1
#define KAT_DATA_ERROR -3
#define KAT_CRYPTO_FAILURE -4
#define MAX_FILE_NAME 256
#define MAX_MESSAGE_LENGTH 1024
void init_buffer(unsigned char *buffer, unsigned long long numbytes);
void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, unsigned long long length);
int generate_test_vectors();
int main()
{
int ret = generate_test_vectors();
if (ret != KAT_SUCCESS) {
fprintf(stderr, "test vector generation failed with code %d\n", ret);
}
return ret;
}
int generate_test_vectors()
{
FILE *fp;
char fileName[MAX_FILE_NAME];
unsigned char msg[MAX_MESSAGE_LENGTH];
unsigned char digest[CRYPTO_BYTES];
int ret_val = KAT_SUCCESS;
int count = 1;
init_buffer(msg, sizeof(msg));
sprintf(fileName, "../LWC_HASH_KAT_%d.txt", (CRYPTO_BYTES * 8));
if ((fp = fopen(fileName, "w")) == NULL) {
fprintf(stderr, "Couldn't open <%s> for write\n", fileName);
return KAT_FILE_OPEN_ERROR;
}
for (unsigned long long mlen = 0; mlen <= MAX_MESSAGE_LENGTH; mlen++) {
fprintf(fp, "Count = %d\n", count++);
fprint_bstr(fp, "Msg = ", msg, mlen);
ret_val = crypto_hash(digest, msg, mlen);
if(ret_val != 0) {
fprintf(fp, "crypto_hash returned <%d>\n", ret_val);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
fprint_bstr(fp, "MD = ", digest, CRYPTO_BYTES);
fprintf(fp, "\n");
}
fclose(fp);
return ret_val;
}
void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, unsigned long long length)
{
fprintf(fp, "%s", label);
for (unsigned long long i = 0; i < length; i++)
fprintf(fp, "%02X", data[i]);
fprintf(fp, "\n");
}
void init_buffer(unsigned char *buffer, unsigned long long numbytes)
{
for (unsigned long long i = 0; i < numbytes; i++)
buffer[i] = (unsigned char)i;
}
/* Reference Implementation of ACE-Hash256
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<math.h>
#include<stdlib.h>
#include<stdint.h>
#include "ace.h"
#include "crypto_hash.h"
#include "api.h"
#define KAT_SUCCESS 0
#define KAT_FILE_OPEN_ERROR -1
#define KAT_DATA_ERROR -3
#define KAT_CRYPTO_FAILURE -4
const unsigned char rate_bytes[8] = {0,1,2,3,16,17,18,19};
int acehash_init( unsigned char *state )
{
unsigned char i;
//Initialize the state to all-ZERO
for ( i = 0; i < STATEBYTES; i++ )
state[i] = 0x0;
if ( CRYPTO_BYTES == 32 )
{
//Initialize state with IV 0x804040
//According to specification: B[7] = 0x80; B[6] = 0x40; B[5] = 0x40;
state[8] = 0x80;
state[9] = 0x40;
state[10] = 0x40;
ace_permutation(state);
}
else
{
return KAT_CRYPTO_FAILURE;
}
return KAT_SUCCESS;
}
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
)
{
unsigned char *state;
unsigned char i, lblen;
//int func_ret;
u64 j, in64len;
in64len = inlen/8;
lblen = (unsigned char)(inlen%8);
state = (unsigned char *)malloc(sizeof(unsigned char)*STATEBYTES);
//Initialize state with predefined IV.
if ( acehash_init(state)!= KAT_SUCCESS )
return(KAT_CRYPTO_FAILURE);
//Absorbing phase: Rate Bytes A[0],A[1],A[2],A[3],C[0],C[1],C[2],C[3]
if ( inlen != 0 )
{
for ( j = 0; j < in64len; j++ )
{
for ( i = 0; i < 8; i++ )
state[rate_bytes[i]]^=in[8*j+((u64)i)];
ace_permutation(state);
}
if ( lblen != 0 )
{
//Encrypting the padded 64-bit block when "mlen" is not a multiple of 8
for ( i = 0; i < lblen; i++ )
state[rate_bytes[i]]^= in[in64len*8+(u64)i];
state[rate_bytes[lblen]]^=(0x80); //Padding: 10*
ace_permutation(state);
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
ace_permutation(state);
}
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
ace_permutation(state);
}
//Squeezing phase
if ( CRYPTO_BYTES == 32 )
{
for ( i = 0; i < 8; i++ )
out[i] = state[rate_bytes[i]];
ace_permutation(state);
for ( i = 0; i < 8; i++ )
out[i+8] = state[rate_bytes[i]];
ace_permutation(state);
for ( i = 0; i < 8; i++ )
out[i+16] = state[rate_bytes[i]];
ace_permutation(state);
for ( i = 0; i < 8; i++ )
out[i+24] = state[rate_bytes[i]];
}
else
out=NULL;
free(state);
return KAT_SUCCESS;
}
This source diff could not be displayed because it is too large. You can view the blob instead.
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \
(((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \
(((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \
(((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \
(((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24))
#define U16BIG(x) \
((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
u32_2 K0, K1, N0, N1;
u32_2 x0, x1, x2, x3, x4;
u32_2 t0, t1, t2, t3, t4;
u64 tmp0, tmp1;
u32 i;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
to_bit_interleaving(K0, U64BIG(*(u64*)k));
to_bit_interleaving(K1, U64BIG(*(u64*)(k + 8)));
to_bit_interleaving(N0, U64BIG(*(u64*)npub));
to_bit_interleaving(N1, U64BIG(*(u64*)(npub + 8)));
// initialization
to_bit_interleaving(x0, IV);
x1.o = K0.o;
x1.e = K0.e;
x2.e = K1.e;
x2.o = K1.o;
x3.e = N0.e;
x3.o = N0.o;
x4.e = N1.e;
x4.o = N1.o;
P12();
x3.e ^= K0.e;
x3.o ^= K0.o;
x4.e ^= K1.e;
x4.o ^= K1.o;
// process associated data
if (adlen) {
while (adlen >= RATE) {
to_bit_interleaving(t0, U64BIG(*(u64*)ad));
x0.e ^= t0.e;
x0.o ^= t0.o;
to_bit_interleaving(t1, U64BIG(*(u64*)(ad + 8)));
x1.e ^= t1.e;
x1.o ^= t1.o;
P8();
adlen -= RATE;
ad += RATE;
}
tmp0 = 0;
tmp1 = 0;
for (i = 0; i < adlen; ++i, ++ad)
if (i < 8)
tmp0 ^= INS_BYTE64(*ad, i);
else
tmp1 ^= INS_BYTE64(*ad, i % 8);
if (adlen < 8)
tmp0 ^= INS_BYTE64(0x80, adlen);
else
tmp1 ^= INS_BYTE64(0x80, adlen % 8);
to_bit_interleaving(t0, tmp0);
x0.e ^= t0.e;
x0.o ^= t0.o;
to_bit_interleaving(t1, tmp1);
x1.e ^= t1.e;
x1.o ^= t1.o;
P8();
}
x4.e ^= 1;
// process plaintext
clen -= CRYPTO_ABYTES;
while (clen >= RATE) {
from_bit_interleaving(tmp0, x0);
from_bit_interleaving(tmp1, x1);
*(u64*)m = U64BIG(tmp0) ^ *(u64*)c;
*(u64*)(m + 8) = U64BIG(tmp1) ^ *(u64*)(c + 8);
to_bit_interleaving(x0, U64BIG(*(u64*)c));
to_bit_interleaving(x1, U64BIG(*(u64*)(c + 8)));
P8();
clen -= RATE;
m += RATE;
c += RATE;
}
from_bit_interleaving(tmp0, x0);
from_bit_interleaving(tmp1, x1);
for (i = 0; i < clen; ++i, ++m, ++c) {
if (i < 8) {
*m = EXT_BYTE64(tmp0, i) ^ *c;
tmp0 &= ~INS_BYTE64(0xff, i);
tmp0 |= INS_BYTE64(*c, i);
} else {
*m = EXT_BYTE64(tmp1, i % 8) ^ *c;
tmp1 &= ~INS_BYTE64(0xff, i % 8);
tmp1 |= INS_BYTE64(*c, i % 8);
}
}
if (clen < 8)
tmp0 ^= INS_BYTE64(0x80, clen);
else
tmp1 ^= INS_BYTE64(0x80, clen % 8);
to_bit_interleaving(x0, tmp0);
to_bit_interleaving(x1, tmp1);
// finalization
x2.e ^= K0.e;
x2.o ^= K0.o;
x3.e ^= K1.e;
x3.o ^= K1.o;
P12();
x3.e ^= K0.e;
x3.o ^= K0.o;
x4.e ^= K1.e;
x4.o ^= K1.o;
// verify tag
from_bit_interleaving(tmp0, x3);
from_bit_interleaving(tmp1, x4);
if (*(u64*)c != U64BIG(tmp0) || *(u64*)(c + 8) != U64BIG(tmp1)) {
*mlen = 0;
return -1;
}
return 0;
}
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
u32_2 K0, K1, N0, N1;
u32_2 x0, x1, x2, x3, x4;
u32_2 t0, t1, t2, t3, t4;
u64 tmp0, tmp1;
u32 i;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
// load key and nonce
to_bit_interleaving(K0, U64BIG(*(u64*)k));
to_bit_interleaving(K1, U64BIG(*(u64*)(k + 8)));
to_bit_interleaving(N0, U64BIG(*(u64*)npub));
to_bit_interleaving(N1, U64BIG(*(u64*)(npub + 8)));
// initialization
to_bit_interleaving(x0, IV);
x1.o = K0.o;
x1.e = K0.e;
x2.e = K1.e;
x2.o = K1.o;
x3.e = N0.e;
x3.o = N0.o;
x4.e = N1.e;
x4.o = N1.o;
P12();
x3.e ^= K0.e;
x3.o ^= K0.o;
x4.e ^= K1.e;
x4.o ^= K1.o;
// process associated data
if (adlen) {
while (adlen >= RATE) {
to_bit_interleaving(t0, U64BIG(*(u64*)ad));
x0.e ^= t0.e;
x0.o ^= t0.o;
to_bit_interleaving(t1, U64BIG(*(u64*)(ad + 8)));
x1.e ^= t1.e;
x1.o ^= t1.o;
P8();
adlen -= RATE;
ad += RATE;
}
tmp0 = 0;
tmp1 = 0;
for (i = 0; i < adlen; ++i, ++ad)
if (i < 8)
tmp0 ^= INS_BYTE64(*ad, i);
else
tmp1 ^= INS_BYTE64(*ad, i % 8);
if (adlen < 8)
tmp0 ^= INS_BYTE64(0x80, adlen);
else
tmp1 ^= INS_BYTE64(0x80, adlen % 8);
to_bit_interleaving(t0, tmp0);
x0.e ^= t0.e;
x0.o ^= t0.o;
to_bit_interleaving(t1, tmp1);
x1.e ^= t1.e;
x1.o ^= t1.o;
P8();
}
x4.e ^= 1;
// process plaintext
while (mlen >= RATE) {
to_bit_interleaving(t0, U64BIG(*(u64*)m));
x0.e ^= t0.e;
x0.o ^= t0.o;
to_bit_interleaving(t1, U64BIG(*(u64*)(m + 8)));
x1.e ^= t1.e;
x1.o ^= t1.o;
from_bit_interleaving(tmp0, x0);
*(u64*)c = U64BIG(tmp0);
from_bit_interleaving(tmp1, x1);
*(u64*)(c + 8) = U64BIG(tmp1);
P8();
mlen -= RATE;
m += RATE;
c += RATE;
}
tmp0 = 0;
tmp1 = 0;
for (i = 0; i < mlen; ++i, ++m)
if (i < 8)
tmp0 ^= INS_BYTE64(*m, i);
else
tmp1 ^= INS_BYTE64(*m, i % 8);
if (mlen < 8)
tmp0 ^= INS_BYTE64(0x80, mlen);
else
tmp1 ^= INS_BYTE64(0x80, mlen % 8);
to_bit_interleaving(t0, tmp0);
x0.e ^= t0.e;
x0.o ^= t0.o;
to_bit_interleaving(t1, tmp1);
x1.e ^= t1.e;
x1.o ^= t1.o;
from_bit_interleaving(tmp0, x0);
from_bit_interleaving(tmp1, x1);
for (i = 0; i < mlen; ++i, ++c)
if (i < 8)
*c = EXT_BYTE64(tmp0, i);
else
*c = EXT_BYTE64(tmp1, i % 8);
// finalization
x2.e ^= K0.e;
x2.o ^= K0.o;
x3.e ^= K1.e;
x3.o ^= K1.o;
P12();
x3.e ^= K0.e;
x3.o ^= K0.o;
x4.e ^= K1.e;
x4.o ^= K1.o;
// set tag
from_bit_interleaving(tmp0, x3);
*(u64*)c = U64BIG(tmp0);
from_bit_interleaving(tmp1, x4);
*(u64*)(c + 8) = U64BIG(tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \
(((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \
(((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \
(((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \
(((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24))
#define U16BIG(x) \
((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include "endian.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
#define to_bit_interleaving(out, in) \
do { \
u32 hi = (in) >> 32; \
u32 lo = (u32)(in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
(out).e = (lo & 0x0000FFFF) | (hi << 16); \
(out).o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
#define from_bit_interleaving(out, in) \
do { \
u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \
u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
} while (0)
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
x2.e ^= C_e; x2.o ^= C_o; \
/* s-box layer */ \
x0.e ^= x4.e; x0.o ^= x4.o; \
x4.e ^= x3.e; x4.o ^= x3.o; \
x2.e ^= x1.e; x2.o ^= x1.o; \
t0.e = x0.e; t0.o = x0.o; \
t4.e = x4.e; t4.o = x4.o; \
t3.e = x3.e; t3.o = x3.o; \
t1.e = x1.e; t1.o = x1.o; \
t2.e = x2.e; t2.o = x2.o; \
x0.e = t0.e ^ (~t1.e & t2.e); x0.o = t0.o ^ (~t1.o & t2.o); \
x2.e = t2.e ^ (~t3.e & t4.e); x2.o = t2.o ^ (~t3.o & t4.o); \
x4.e = t4.e ^ (~t0.e & t1.e); x4.o = t4.o ^ (~t0.o & t1.o); \
x1.e = t1.e ^ (~t2.e & t3.e); x1.o = t1.o ^ (~t2.o & t3.o); \
x3.e = t3.e ^ (~t4.e & t0.e); x3.o = t3.o ^ (~t4.o & t0.o); \
x1.e ^= x0.e; x1.o ^= x0.o; \
x3.e ^= x2.e; x3.o ^= x2.o; \
x0.e ^= x4.e; x0.o ^= x4.o; \
/* linear layer */ \
t0.e = x0.e ^ ROTR32(x0.o, 4); t0.o = x0.o ^ ROTR32(x0.e, 5); \
t1.e = x1.e ^ ROTR32(x1.e, 11); t1.o = x1.o ^ ROTR32(x1.o, 11); \
t2.e = x2.e ^ ROTR32(x2.o, 2); t2.o = x2.o ^ ROTR32(x2.e, 3); \
t3.e = x3.e ^ ROTR32(x3.o, 3); t3.o = x3.o ^ ROTR32(x3.e, 4); \
t4.e = x4.e ^ ROTR32(x4.e, 17); t4.o = x4.o ^ ROTR32(x4.o, 17); \
x0.e ^= ROTR32(t0.o, 9); x0.o ^= ROTR32(t0.e, 10); \
x1.e ^= ROTR32(t1.o, 19); x1.o ^= ROTR32(t1.e, 20); \
x2.e ^= t2.o; x2.o ^= ROTR32(t2.e, 1); \
x3.e ^= ROTR32(t3.e, 5); x3.o ^= ROTR32(t3.o, 5); \
x4.e ^= ROTR32(t4.o, 3); x4.o ^= ROTR32(t4.e, 4); \
x2.e = ~x2.e; x2.o = ~x2.o; \
} while(0)
#define P12() \
do { \
ROUND(0xc, 0xc); \
ROUND(0x9, 0xc); \
ROUND(0xc, 0x9); \
ROUND(0x9, 0x9); \
ROUND(0x6, 0xc); \
ROUND(0x3, 0xc); \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#define P8() \
do { \
ROUND(0x6, 0xc); \
ROUND(0x3, 0xc); \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#define P6() \
do { \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
u32_2 K0, K1, N0, N1;
u32_2 x0, x1, x2, x3, x4;
u32_2 t0, t1;
u64 tmp0, tmp1;
u32 i;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
to_bit_interleaving(K0, U64BIG(*(u64*)k));
to_bit_interleaving(K1, U64BIG(*(u64*)(k + 8)));
to_bit_interleaving(N0, U64BIG(*(u64*)npub));
to_bit_interleaving(N1, U64BIG(*(u64*)(npub + 8)));
// initialization
to_bit_interleaving(x0, IV);
x1.o = K0.o;
x1.e = K0.e;
x2.e = K1.e;
x2.o = K1.o;
x3.e = N0.e;
x3.o = N0.o;
x4.e = N1.e;
x4.o = N1.o;
P12();
x3.e ^= K0.e;
x3.o ^= K0.o;
x4.e ^= K1.e;
x4.o ^= K1.o;
// process associated data
if (adlen) {
while (adlen >= RATE) {
to_bit_interleaving(t0, U64BIG(*(u64*)ad));
x0.e ^= t0.e;
x0.o ^= t0.o;
to_bit_interleaving(t1, U64BIG(*(u64*)(ad + 8)));
x1.e ^= t1.e;
x1.o ^= t1.o;
P8();
adlen -= RATE;
ad += RATE;
}
tmp0 = 0;
tmp1 = 0;
for (i = 0; i < adlen; ++i, ++ad)
if (i < 8)
tmp0 ^= INS_BYTE64(*ad, i);
else
tmp1 ^= INS_BYTE64(*ad, i % 8);
if (adlen < 8)
tmp0 ^= INS_BYTE64(0x80, adlen);
else
tmp1 ^= INS_BYTE64(0x80, adlen % 8);
to_bit_interleaving(t0, tmp0);
x0.e ^= t0.e;
x0.o ^= t0.o;
to_bit_interleaving(t1, tmp1);
x1.e ^= t1.e;
x1.o ^= t1.o;
P8();
}
x4.e ^= 1;
// process plaintext
clen -= CRYPTO_ABYTES;
while (clen >= RATE) {
from_bit_interleaving(tmp0, x0);
from_bit_interleaving(tmp1, x1);
*(u64*)m = U64BIG(tmp0) ^ *(u64*)c;
*(u64*)(m + 8) = U64BIG(tmp1) ^ *(u64*)(c + 8);
to_bit_interleaving(x0, U64BIG(*(u64*)c));
to_bit_interleaving(x1, U64BIG(*(u64*)(c + 8)));
P8();
clen -= RATE;
m += RATE;
c += RATE;
}
from_bit_interleaving(tmp0, x0);
from_bit_interleaving(tmp1, x1);
for (i = 0; i < clen; ++i, ++m, ++c) {
if (i < 8) {
*m = EXT_BYTE64(tmp0, i) ^ *c;
tmp0 &= ~INS_BYTE64(0xff, i);
tmp0 |= INS_BYTE64(*c, i);
} else {
*m = EXT_BYTE64(tmp1, i % 8) ^ *c;
tmp1 &= ~INS_BYTE64(0xff, i % 8);
tmp1 |= INS_BYTE64(*c, i % 8);
}
}
if (clen < 8)
tmp0 ^= INS_BYTE64(0x80, clen);
else
tmp1 ^= INS_BYTE64(0x80, clen % 8);
to_bit_interleaving(x0, tmp0);
to_bit_interleaving(x1, tmp1);
// finalization
x2.e ^= K0.e;
x2.o ^= K0.o;
x3.e ^= K1.e;
x3.o ^= K1.o;
P12();
x3.e ^= K0.e;
x3.o ^= K0.o;
x4.e ^= K1.e;
x4.o ^= K1.o;
// verify tag
from_bit_interleaving(tmp0, x3);
from_bit_interleaving(tmp1, x4);
if (*(u64*)c != U64BIG(tmp0) || *(u64*)(c + 8) != U64BIG(tmp1)) {
*mlen = 0;
return -1;
}
return 0;
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment