Commit 06f56ee8 by lwc-tester

Round 1 Candidates

parents

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

This source diff could not be displayed because it is too large. You can view the blob instead.
#CC=gcc -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse3 -mmmx -mavx -mavx2
#CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2 -mmmx -mavx -mavx2
CC=gcc -Wall -O3 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -mavx -mavx2
#CC=gcc -O1 -fomit-frame-pointer -march=haswell -mtune=native -mavx
#ivybridgei, skylake, sandybridge, haswell
all: aceavx_1
aceavx_1:aceavx
aceavx: speed.c ace.c
$(CC) -o $@ $^
.PHONY: clean
clean:
rm -fr aceavx
/* Reference implementation of ACE-128, AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#ifndef ACE_H
#define ACE_H
#include<stdint.h>
#include<x86intrin.h>
//#include<smmintrin.h>
//#include <immintrin.h>
#define STATEBYTES 40
#define STATEDWORD 10 // 320/32 = 8//
#define SIMECKROUND 8
#define NUMSTEPS 16
#define PARAL_INST_BY8 1
static const unsigned char SC0[16]={0x50,0x5c,0x91,0x8d,0x53,0x60,0x68,0xe1,0xf6,0x9d,0x40,0x4f,0xbe,0x5b,0xe9,0x7f}; //Step constants (SC_{2i})
static const unsigned char SC1[16]={0x28,0xae,0x48,0xc6,0xa9,0x30,0x34,0x70,0x7b,0xce,0x20,0x27,0x5f,0xad,0x74,0x3f}; //Step constants (SC_{2i+1})
static const unsigned char SC2[16]={0x14,0x57,0x24,0x63,0x54,0x18,0x9a,0x38,0xbd,0x67,0x10,0x13,0x2f,0xd6,0xba,0x1f}; //Step constants (SC_{2i+2})
static const unsigned char RC0[16]={0x07,0x0a,0x9b,0xe0,0xd1,0x1a,0x22,0xf7,0x62,0x96,0x71,0xaa,0x2b,0xe9,0xcf,0xb7};//Round constants (RC_{2i})
static const unsigned char RC1[16]={0x53,0x5d,0x49,0x7f,0xbe,0x1d,0x28,0x6c,0x82,0x47,0x6b,0x88,0xdc,0x8b,0x59,0xc6};//Round constants (RC_{2i+1})
static const unsigned char RC2[16]={0x43,0xe4,0x5e,0xcc,0x32,0x4e,0x75,0x25,0xfd,0xf9,0x76,0xa0,0xb0,0x09,0x1e,0xad};//Round constants (RC_{2i+2})
typedef unsigned long long int u64;
typedef unsigned int u32;
typedef unsigned int u8;
#define ROT5(x) (_mm256_slli_epi32(x, 5) | _mm256_srli_epi32(x, 27))
#define ROT1(x) (_mm256_slli_epi32(x, 1) | _mm256_srli_epi32(x, 31))
#define SWAPREG1(x) (_mm256_permutevar8x32_epi32(x, _mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0)))
#define RC(t1, t2) (_mm256_set_epi32(0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2,0xfffffffe^t1))
#define SC(t1, t2) (_mm256_set_epi32(0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff, 0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff))
#define SWAPBLK(x) (_mm256_permute4x64_epi64(x, _MM_SHUFFLE(2,3,0,1)))
#define SWAPREG2(x) (_mm256_permutevar8x32_epi32(x, _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0)))
#define SWAPAC(x) (_mm256_permutevar8x32_epi32(xtmp, _mm256_set_epi32(5, 4, 7, 6, 1, 0, 3, 2)))
#define masklo (_mm256_set_epi32(0x0, 0x0, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff,0xffffffff))
#define maskhi (_mm256_set_epi32(0xffffffff, 0xffffffff, 0xffffffff,0xffffffff,0x0, 0x0, 0x0, 0x0))
#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)
#define ROAX(x, y, t1, t2)\
{\
__m256i x2tmp;\
x2tmp = x;\
x = (ROT5(x)&x)^ROT1(x)^RC(t1, t2)^y;\
y = x2tmp;\
}
#define PACK_SSb(x, y)\
{\
__m256i xtmp, ytmp;\
xtmp = SWAPREG1(x);\
ytmp = SWAPREG1(y);\
x = _mm256_permute2x128_si256(xtmp,ytmp,0x20);\
y = _mm256_permute2x128_si256(xtmp,ytmp, 0x31);\
}
#define UNPACK_SSb(x, y)\
{\
__m256i xtmp, ytmp;\
xtmp = _mm256_unpacklo_epi32(x, y);\
ytmp = _mm256_unpackhi_epi32(x, y);\
x = _mm256_permute2x128_si256(xtmp, ytmp,0x20);\
y = _mm256_permute2x128_si256(xtmp, ytmp, 0x31);\
}
#define PACK(x, y, z, w)\
{\
__m256i x2tmp, x3tmp;\
x2tmp = SWAPREG2(x);\
x3tmp = SWAPREG2(z);\
x = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
z = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
x2tmp = SWAPREG2(y);\
x3tmp = SWAPREG2(w);\
y = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
w = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
}
#define UNPACK(x,y,z,w)\
{\
__m256i x2tmp, x3tmp;\
x2tmp = _mm256_unpacklo_epi64(x, z);\
x3tmp = _mm256_unpackhi_epi64(x, z);\
x = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
z = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
x2tmp = _mm256_unpacklo_epi64(y, w);\
x3tmp = _mm256_unpackhi_epi64(y, w);\
y = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
w = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
}
void ace320( u32 *state );
int crypto_aead_encrypt( u32 *tag, u32 tlen, u32 *c, u32 *m, u32 mlen, u32 *ad, u32 adlen, u8 *k, u8 *npub, u32 klen );
int crypto_aead_decrypt( u32 *m, u32 *c, u32 mlen, u32 *tag, u32 tlen, u32 *ad, u32 adlen, u8 *k, u8 *npub, u32 klen );
#endif
#ifndef CLOCK_CYCLE_H
#define CLOCK_CYCLE_H
typedef unsigned long long int u64;
u64 start_rdtsc( )
{
unsigned high, low;
__asm__ volatile("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (high),
"=r" (low):: "%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
u64 end_rdtsc( )
{
unsigned high, low;
__asm__ volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax,%1\n\t"
"CPUID\n\t": "=r" (high), "=r" (low)::
"%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
#endif
/* Reference implementation of ACE-128, AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<math.h>
#include "ace.h"
#include "clock_cycle.h"
#define NUM_ITER 2000
#define NUM_TEST 500
void print_state ( u32 *state )
{
u8 i, j;
for ( j = 0; j < 8*PARAL_INST_BY8; j++ )
{
for ( i = 0; i < STATEDWORD; i++ )
printf("%.8X", state[i+j*STATEDWORD]);
printf("\n");
}
return;
}
int main()
{
u8 num_parallel_inst;
u32 *state;
int i, j;
u64 t[NUM_ITER+1], count_cc;
u32 *plaintext, *ciphertext, *tag, *key, *nonce, *ad;
u32 plen, tlen, klen, adlen;
u8 *k, *pubn;
num_parallel_inst = 8*PARAL_INST_BY8;
adlen = 4; // Associated data length = adlen*32;
plen = 32; // Message length = plen*32;
tlen = 4; // 128 = 32*4 bits
klen = 4; // 128 = 32*4 bits
key = (u32 *)malloc(sizeof(u32)*klen*num_parallel_inst);
nonce = (u32 *)malloc(sizeof(u32)*klen*num_parallel_inst);
tag = (u32 *)malloc(sizeof(u32)*tlen*num_parallel_inst);
ad = (u32 *)malloc(sizeof(u32)*adlen*num_parallel_inst);
plaintext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
ciphertext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
k = (u8 *)malloc(sizeof(u8)*16*num_parallel_inst);
pubn = (u8 *)malloc(sizeof(u8)*16*num_parallel_inst);
state = (u32 *)malloc(sizeof(u32)*num_parallel_inst*STATEDWORD);
//Randomly initilizes messages to states
for ( i = 0; i < num_parallel_inst*STATEDWORD; i++ )
*(state+i) = i%STATEDWORD;
//*(state+i) = 0x01;
print_state(state);
ace320( state );
//ace320( state );
print_state(state);
//exit(0);
//Assinging 128-bit keys, nonces and messages//
for ( i = 0; i < num_parallel_inst*klen; i++ )
{
*(nonce+i) = 0x40404040;
*(key+i) = 0x10101010;
}
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
*(plaintext+plen*i+j) = j%plen;
}
for ( i = 0; i < num_parallel_inst*adlen; i++ )
*(ad+i) = 0xffffffff;
for ( i = 0; i < num_parallel_inst*16; i++ )
{
k[i] = 0x0;
*(pubn+i) = 0;
}
for ( i = 0; i < NUM_ITER+1; i++ )
{
count_cc = start_rdtsc();
for ( j = 0; j < NUM_TEST; j++ )
ace320( state );
count_cc = end_rdtsc()-count_cc;
printf("Cycles per byte = %f\n", (double)(count_cc)/(double)(num_parallel_inst*STATEBYTES*NUM_TEST));
}
//print_state(state);
printf("\n");
//===================================================================================================================
// AEAD: Encryption and Decryption Module//
//===================================================================================================================
printf("Nonce and Key:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
printf( "%08X%08X%08X%08X", nonce[4*i+0], nonce[4*i+1], nonce[4*i+2], nonce[4*i+3]);
printf( "%08X%08X%08X%08X\n", key[4*i+0], key[4*i+1], key[4*i+2], key[4*i+3]);
}
for ( i = 0; i < NUM_ITER; i++ )
{
count_cc = start_rdtsc();
crypto_aead_encrypt( tag, tlen, ciphertext, plaintext, plen, ad, adlen, k, pubn, 16 );
count_cc = end_rdtsc()-count_cc;
printf("Encryption speed = %f cpb\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
}
//Checking Correctness of Autheticated encryption and Decryption
crypto_aead_encrypt( tag, tlen, ciphertext, plaintext, plen, ad, adlen, k, pubn, 16 );
printf("Original plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Ciphertext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", ciphertext[i*plen+j]);
printf("\n");
}
printf("Tag:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < tlen; j++ )
printf("%08X", tag[i*tlen+j]);
printf("\n");
}
crypto_aead_decrypt( plaintext, ciphertext, plen, tag, tlen, ad, adlen, k, pubn, 16 );
printf("Decrypted plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Tag:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < tlen; j++ )
printf("%08X", tag[i*tlen+j]);
printf("\n");
}
free(state);
free(plaintext);
free(ciphertext);
free(tag);
free(key);
free(nonce);
free(ad);
return(0);
}
#CC=gcc -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse3 -mmmx -mavx -mavx2
#CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2 -mmmx -mavx -mavx2
CC=gcc -Wall -O3 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2
#CC=gcc -O1 -fomit-frame-pointer -march=haswell -mtune=native -mavx
#ivybridgei, skylake, sandybridge, haswell
all: acesse2_1
acesse2_1:acesse2
acesse2: speed.c ace.c
$(CC) -o $@ $^
.PHONY: clean
clean:
rm -fr acesse2
/* Reference implementation of ACE-128, AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#ifndef ACE_H
#define ACE_H
#include<stdint.h>
#include<x86intrin.h>
#include<smmintrin.h>
#define STATEBYTES 40
#define STATEDWORD 10 // 320/32 = 8//
#define SIMECKROUND 8
//#define NUMSTEPS 16
#define NUMSTEPS 16
#define PARAL_INST_BY4 1
//New
static const unsigned char SC0[16]={0x50,0x5c,0x91,0x8d,0x53,0x60,0x68,0xe1,0xf6,0x9d,0x40,0x4f,0xbe,0x5b,0xe9,0x7f}; //Step constants (SC_{2i})
static const unsigned char SC1[16]={0x28,0xae,0x48,0xc6,0xa9,0x30,0x34,0x70,0x7b,0xce,0x20,0x27,0x5f,0xad,0x74,0x3f}; //Step constants (SC_{2i+1})
static const unsigned char SC2[16]={0x14,0x57,0x24,0x63,0x54,0x18,0x9a,0x38,0xbd,0x67,0x10,0x13,0x2f,0xd6,0xba,0x1f}; //Step constants (SC_{2i+2})
static const unsigned char RC0[16]={0x07,0x0a,0x9b,0xe0,0xd1,0x1a,0x22,0xf7,0x62,0x96,0x71,0xaa,0x2b,0xe9,0xcf,0xb7};//Round constants (RC_{2i})
static const unsigned char RC1[16]={0x53,0x5d,0x49,0x7f,0xbe,0x1d,0x28,0x6c,0x82,0x47,0x6b,0x88,0xdc,0x8b,0x59,0xc6};//Round constants (RC_{2i+1})
static const unsigned char RC2[16]={0x43,0xe4,0x5e,0xcc,0x32,0x4e,0x75,0x25,0xfd,0xf9,0x76,0xa0,0xb0,0x09,0x1e,0xad};//Round constants (RC_{2i+2})
typedef unsigned long long int u64;
typedef unsigned int u32;
typedef unsigned int u8;
#define ROT5(x) (_mm_slli_epi32(x, 5) | _mm_srli_epi32(x, 27))
#define ROT1(x) (_mm_slli_epi32(x, 1) | _mm_srli_epi32(x, 31))
#define RC(t1, t2) (_mm_set_epi32(0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1))
#define SC(t1, t2) (_mm_set_epi32(0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff ))
#define SWAPREG1(x) (_mm_shuffle_epi32(x, _MM_SHUFFLE(3, 1, 2, 0)))
#define SWAPBLK(x) (_mm_slli_si128(x, 8)|_mm_srli_si128(x, 8))
#define masklo (_mm_set_epi32(0x0, 0x0, 0xffffffff, 0xffffffff ))
#define maskhi (_mm_set_epi32(0xffffffff, 0xffffffff, 0x0, 0x0 ))
#define ROAX(x, y, t1, t2)\
{\
__m128i xtmp;\
xtmp = x;\
x = (ROT5(x)&x)^ROT1(x)^RC(t1, t2)^y;\
y = xtmp;\
}
#define PACK_SSb(x, y)\
{\
__m128i xtmp, ytmp;\
xtmp = SWAPREG1(x);\
ytmp = SWAPREG1(y);\
x = _mm_unpacklo_epi64(xtmp, ytmp);\
y = _mm_unpackhi_epi64(xtmp, ytmp);\
}
#define UNPACK_SSb(x, y)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_unpacklo_epi32(x, y);\
ytmp = _mm_unpackhi_epi32(x, y);\
x = xtmp;\
y = ytmp;\
}
#define PACK(x, y, z, w, state, i1, i2, i3, i4)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_loadu_si128((void *) (state + i1));\
ytmp = _mm_loadu_si128((void *) (state + i2));\
x = _mm_unpacklo_epi64(xtmp, ytmp);\
z = _mm_unpackhi_epi64(xtmp, ytmp);\
xtmp = _mm_loadu_si128((void *) (state + i3));\
ytmp = _mm_loadu_si128((void *) (state + i4));\
y = _mm_unpacklo_epi64(xtmp, ytmp);\
w = _mm_unpackhi_epi64(xtmp, ytmp);\
}
#define UNPACK(x, y, z, w)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_unpacklo_epi64(x, z);\
ytmp = _mm_unpackhi_epi64(x, z);\
x = xtmp;\
z = ytmp;\
xtmp = _mm_unpacklo_epi64(y, w);\
ytmp = _mm_unpackhi_epi64(y, w);\
y = xtmp;\
w = ytmp;\
}
void ace320( u32 *state );
void ace_encrypt( u32 *tag, u32 tlen, u32 *ciphertext, u32 *plaintext, u32 plen, u32 *key, u32 *nonce, u32 klen );
int crypto_aead_encrypt( u32 *tag, u32 tlen, u32 *c, u32 *m, u32 mlen, u32 *ad, u32 adlen, u8 *k, u8 *npub, u32 klen );
int crypto_aead_decrypt( u32 *m, u32 *c, u32 mlen, u32 *tag, u32 tlen, u32 *ad, u32 adlen, u8 *k, u8 *npub, u32 klen );
#endif
#ifndef CLOCK_CYCLE_H
#define CLOCK_CYCLE_H
typedef unsigned long long int u64;
u64 start_rdtsc( )
{
unsigned high, low;
__asm__ volatile("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (high),
"=r" (low):: "%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
u64 end_rdtsc( )
{
unsigned high, low;
__asm__ volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax,%1\n\t"
"CPUID\n\t": "=r" (high), "=r" (low)::
"%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
#endif
/* Reference implementation of ACE-128, AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<math.h>
#include "ace.h"
#include "clock_cycle.h"
#define NUM_ITER 2000
#define NUM_TEST 500
void print_state ( u32 *state )
{
u8 i, j;
for ( j = 0; j < 4*PARAL_INST_BY4; j++ )
{
for ( i = 0; i < STATEDWORD; i++ )
printf("%.8X", state[i+j*STATEDWORD]);
printf("\n");
}
return;
}
int main()
{
u8 num_parallel_inst;
u32 *state;
int i, j;
u64 t[NUM_ITER+1], count_cc;
u32 *plaintext, *ciphertext, *tag, *key, *nonce, *ad;
u32 plen, tlen, klen, adlen;
u8 *k, *pubn;
num_parallel_inst = 4*PARAL_INST_BY4;
adlen = 4; // Associated data length = adlen*32;
plen = 32; // Message length = plen*32;
tlen = 4; // 128 = 32*4 bits
klen = 4; // 128 = 32*4 bits
key = (u32 *)malloc(sizeof(u32)*klen*num_parallel_inst);
nonce = (u32 *)malloc(sizeof(u32)*klen*num_parallel_inst);
tag = (u32 *)malloc(sizeof(u32)*tlen*num_parallel_inst);
ad = (u32 *)malloc(sizeof(u32)*adlen*num_parallel_inst);
plaintext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
ciphertext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
k = (u8 *)malloc(sizeof(u8)*16*num_parallel_inst);
pubn = (u8 *)malloc(sizeof(u8)*16*num_parallel_inst);
state = (u32 *)malloc(sizeof(u32)*num_parallel_inst*STATEDWORD);
//Randomly initilizes messages to states
for ( i = 0; i < num_parallel_inst*STATEDWORD; i++ )
*(state+i) = i%STATEDWORD;
//*(state+i) = 0x01;
print_state(state);
ace320( state );
//ace320( state );
print_state(state);
//exit(0);
//Assinging 128-bit keys, nonces and messages//
for ( i = 0; i < num_parallel_inst*klen; i++ )
{
*(nonce+i) = 0x40404040;
*(key+i) = 0x10101010;
}
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
*(plaintext+i*plen+j) = j%plen;
}
for ( i = 0; i < num_parallel_inst*adlen; i++ )
*(ad+i) = 0xffffffff;
for ( i = 0; i < num_parallel_inst*16; i++ )
{
*(k+i) = 0x0;
*(pubn+i) = 0x0;
}
for ( i = 0; i < NUM_ITER+1; i++ )
{
count_cc = start_rdtsc();
for ( j = 0; j < NUM_TEST; j++ )
ace320( state );
count_cc = end_rdtsc()-count_cc;
printf("Cycles per byte = %f\n", (double)(count_cc)/(double)(num_parallel_inst*STATEBYTES*NUM_TEST));
}
print_state(state);
printf("\n");
//===================================================================================================================
// AEAD: Encryption and Decryption Module//
//===================================================================================================================
printf("Nonce and Key:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
printf( "%08X%08X%08X%08X", nonce[4*i+0], nonce[4*i+1], nonce[4*i+2], nonce[4*i+3]);
printf( "%08X%08X%08X%08X\n", key[4*i+0], key[4*i+1], key[4*i+2], key[4*i+3]);
}
for ( i = 0; i < 2000; i++ )
{
count_cc = start_rdtsc();
crypto_aead_encrypt( tag, tlen, ciphertext, plaintext, plen, ad, adlen, k, pubn, 16 );
count_cc = end_rdtsc()-count_cc;
printf("Encryption speed = %f cpb\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
}
crypto_aead_encrypt( tag, tlen, ciphertext, plaintext, plen, ad, adlen, k, pubn, 16 );
printf("Original plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Ciphertext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", ciphertext[i*plen+j]);
printf("\n");
}
printf("Tag:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < tlen; j++ )
printf("%08X", tag[i*tlen+j]);
printf("\n");
}
crypto_aead_decrypt( plaintext, ciphertext, plen, tag, tlen, ad, adlen, k, pubn, 16 );
printf("Decrypted plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Tag:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < tlen; j++ )
printf("%08X", tag[i*tlen+j]);
printf("\n");
}
free(state);
free(plaintext);
free(ciphertext);
free(tag);
free(key);
free(nonce);
free(ad);
return(0);
}
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
architecture rtl of ace is
signal ctl_control : ace_ctl_ty;
signal ctl_onehot : onehot_ty;
signal ctl_lfsr_en : std_logic;
signal ctl_lfsr_reset : std_logic;
begin
u_dp :
entity work.dp port map
( clk => clk
, reset => reset
, i_mode => i_mode
, i_control => ctl_control
, i_onehot => ctl_onehot
, i_dom_sep => i_dom_sep
, i_valid => i_valid
, i_data => i_data
, i_padding => i_padding
, o_data => o_data
);
u_ctl :
entity work.ctl port map
( clk => clk
, reset => reset
, i_mode => i_mode
, i_dom_sep => i_dom_sep
, i_valid => i_valid
, i_padding => i_padding
, o_valid => o_valid
, o_onehot => ctl_onehot
, o_ready => o_ready
, o_control => ctl_control
);
end architecture;
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.ace_pkg.all;
entity ace is
port
( clk : in std_logic;
reset : in std_logic;
i_mode : in mode_ty;
i_dom_sep : in domsep_ty;
i_valid : in std_logic;
i_data : in word;
i_padding : in std_logic;
o_valid : out std_logic;
o_ready : out std_logic;
o_data : out word
);
end entity;
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
package ace_pkg is
--for constants
constant lfsr_c_sz : integer := 7;
subtype lfsr_c_output is std_logic_vector(0 to lfsr_c_sz+2);
------------------------------------------------------------
constant half_word_sz : natural := 32;
constant word_sz : natural := 2*half_word_sz;
subtype half_word is std_logic_vector( 0 to half_word_sz - 1 );
subtype word is std_logic_vector( 0 to word_sz - 1 );
type word_vector is array( natural range <> ) of word;
type half_word_vector is array( natural range <> ) of half_word;
------------------------------------------------------------
-- A, B, C, D, E
constant state_sz : natural := 320;
constant word_max_idx : natural := state_sz / word_sz - 1;
constant half_word_max_idx : natural := state_sz / half_word_sz - 1;
constant key_sz : natural := 128;
constant nonce_sz : natural := 128;
subtype word_state_ty is word_vector ( 0 to word_max_idx );
constant a_idx : natural := 0;
constant b_idx : natural := 1;
constant c_idx : natural := 2;
constant d_idx : natural := 3;
constant e_idx : natural := 4;
subtype half_word_data is half_word_vector ( 0 to 1 );
subtype half_word_state_ty is half_word_vector ( 0 to half_word_max_idx );
constant a0_idx : natural := 1;
constant a1_idx : natural := 0;
constant b0_idx : natural := 3;
constant b1_idx : natural := 2;
constant c0_idx : natural := 5;
constant c1_idx : natural := 4;
constant d0_idx : natural := 7;
constant d1_idx : natural := 6;
constant e0_idx : natural := 9;
constant e1_idx : natural := 8;
function b2x( b : boolean ) return std_logic;
function half_words_to_words( st : half_word_state_ty ) return word_state_ty;
function words_to_half_words( st : word_state_ty ) return half_word_state_ty;
------------------------------------------------------------
-- mode
subtype mode_ty is std_logic_vector( 1 downto 0 ); -- top lvl input
constant encrypt_mode : mode_ty := ( 1 => '0', 0 => '0' );
constant decrypt_mode : mode_ty := ( 1 => '0', 0 => '1' );
constant absorb_mode : mode_ty := ( 1 => '1', 0 => '0' );
constant squeeze_mode : mode_ty := ( 1 => '1', 0 => '1' );
subtype domsep_ty is std_logic_vector( 1 downto 0 ); -- top lvl input
-- derived control (from counter and more)
subtype ace_ctl_ty is std_logic_vector( 7 downto 0 );
constant absorb_idx : natural := 0;
constant replace_idx : natural := 1;
constant output_idx : natural := 2;
constant endstep_idx : natural := 3;
constant permoff_idx : natural := 4;
constant squeeze_idx : natural := 5;
constant lfsr_c_reset_idx : natural := 6;
constant lfsr_c_en_idx : natural := 7;
-- extras cntl for load, init, fin, tag, sqeeze
subtype onehot_ty is std_logic_vector( 3 downto 0); -- extrs cntl for load, init, fin, tag, sqeeze
------------------------------------------------------------
-- round and step counters
-- use last bit for end ACE perm - for o_ready
-- -> i_valid will reset the counter!
-- counter only runs if msb = 0
constant bits_counter : natural := 8;
subtype count_ty is unsigned( bits_counter - 1 downto 0 );
----------------------------------------------------------------------
-- standard vhdl operators
-- function "sll"( a : half_word; n : natural ) return half_word;
function onehot_rotate (a : onehot_ty) return onehot_ty;
function vector_to_data ( st : half_word_data ) return word;
function data_to_vector ( st : word ) return half_word_data;
----------------------------------------------------------------------
end package;
----------------------------------------------------------------------
--
----------------------------------------------------------------------
package body ace_pkg is
function onehot_rotate (a : onehot_ty)
return onehot_ty
is
variable z : onehot_ty;
begin
z(onehot_ty'high downto 1) := a(onehot_ty'high - 1 downto 0);
z(0) := a(onehot_ty'high);
return z;
end function;
function b2x( b : boolean ) return std_logic is
begin
if b then
return '1';
else
return '0';
end if;
end function;
------------------------------------------------------------
-- standard vhdl operators cast to state
------------------------------------------------------------
-- function "sll"( a : half_word; n : natural ) return half_word is
-- begin
-- return half_word( std_logic_vector( a ) sll n );
-- end function;
------------------------------------------------------------
-- state functions
------------------------------------------------------------
function half_words_to_words( st : half_word_state_ty ) ---- CHECK THIS!!!!!! PLEASE
return word_state_ty
is
variable i : natural;
variable z : word_state_ty;
begin
main_loop : for i in 0 to word_max_idx loop
z(i)(0 to half_word_sz - 1) := st(2*i);
z(i)(half_word_sz to word_sz - 1) := st(2*i+1);
end loop;
return z;
end function;
function words_to_half_words( st : word_state_ty )
return half_word_state_ty
is
variable i : natural;
variable z : half_word_state_ty;
begin
main_loop : for i in 0 to word_max_idx loop
z(2*i) := st(i)(0 to half_word_sz - 1);
z(2*i+1) := st(i)(half_word_sz to word_sz - 1);
end loop;
return z;
end function;
function data_to_vector( st : word )
return half_word_data
is
variable z : half_word_data;
begin
z(0) := st(0 to half_word_sz - 1);
z(1) := st(half_word_sz to word_sz - 1);
return z;
end function;
function vector_to_data( st : half_word_data )
return word
is
variable z : word;
begin
z(0 to half_word_sz - 1) := st(0);
z(half_word_sz to word_sz - 1) := st(1);
return z;
end function;
end package body;
if { $gui_mode } {
add wave clk
add wave reset
add wave i_mode
add wave i_dom_sep
add wave o_ready
add wave i_valid
add wave i_data
add wave i_padding
add wave o_valid
add wave o_data
if { $sim_mode eq "PROG_MODE" } then {
add wave -noupdate -divider -height 32 STUFF
add wave /uut/u_ctl/state
add wave /uut/u_ctl/o_ready
add wave /uut/u_ctl/i_valid
add wave /uut/u_dp/i_data
add wave /uut/u_ctl/o_valid
add wave /uut/u_dp/o_data
add wave -noupdate -divider -height 32 DP
add wave -radix binary /uut/u_dp/ctl_const
add wave /uut/u_dp/i_data
add wave /uut/u_dp/o_data
add wave -radix binary /uut/u_dp/ctl_const
add wave /uut/u_dp/lfsr_c_en
add wave /uut/u_dp/lfsr_c_reset
add wave /uut/u_dp/permoff
add wave /uut/u_dp/endstep
add wave /uut/u_dp/absorb
add wave /uut/u_dp/replace
add wave /uut/u_dp/output
add wave /uut/u_dp/dsxor
add wave /uut/u_dp/post_input
add wave /uut/u_dp/pre_round
add wave /uut/u_dp/post_round
add wave /uut/u_dp/post_xor
add wave /uut/u_dp/post_step_const
add wave /uut/u_dp/post_linear
add wave /uut/u_dp/ace_path
add wave /uut/u_dp/ace_state
add wave -noupdate -divider -height 32 CTL
add wave /uut/u_ctl/state
add wave -radix unsigned /uut/u_ctl/count
add wave /uut/u_ctl/i_valid
add wave /uut/u_ctl/o_valid
add wave /uut/u_ctl/o_ready
add wave -radix binary /uut/u_ctl/onehot
add wave /uut/u_ctl/lfsr_c_reset
add wave -radix binary /uut/u_ctl/i_mode
add wave -radix binary /uut/u_ctl/i_dom_sep
}
}
vcd file ace.vcd
vcd add /ace_tb/uut/*
vcd add -r *
vcd on
run -all
vcd checkpoint
vcd off
vcd flush
if { $gui_mode } {
wave zoom full
} else {
exit
}
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.ace_pkg.all;
entity dp is
port
( clk : in std_logic
; reset : in std_logic
; i_mode : in mode_ty
; i_control : in ace_ctl_ty
; i_onehot : in onehot_ty
; i_dom_sep : in domsep_ty
; i_valid : in std_logic
; i_data : in word
; i_padding : in std_logic
; o_data : out word
);
end entity;
architecture rtl of dp is
signal permoff, endstep, squeeze,
absorb, replace, output,
lfsr_reset, lfsr_en : std_logic;
signal ace_state, post_input : half_word_state_ty;
signal pre_round, post_round,
post_xor, post_step_const,
post_linear, ace_path : word_state_ty;
signal dsxor : half_word;
signal i_data_vector, o_data_vector : half_word_data;
signal ctl_const : lfsr_c_output;
begin
u_lfsr :
entity work.lfsr port map
( clk => clk
, reset => lfsr_reset
, lfsr_en => lfsr_en
, o_const => ctl_const
);
i_data_vector <= data_to_vector( i_data );
o_data <= vector_to_data( o_data_vector );
absorb <= i_control( absorb_idx );
replace <= i_control( replace_idx );
output <= i_control( output_idx );
endstep <= i_control( endstep_idx );
permoff <= i_control( permoff_idx );
squeeze <= i_control( squeeze_idx );
lfsr_reset <= i_control( lfsr_reset_idx );
lfsr_en <= i_control( lfsr_en_idx );
----------------------------------------------------------------------
-- post input: do input and domain separator and replace
post_input( a1_idx ) <= ace_state( a1_idx );
post_input( a0_idx ) <= ace_state( a0_idx );
post_input( b0_idx ) <= ace_state( b0_idx );
post_input( b1_idx ) <= ace_state( b1_idx );
post_input( c0_idx ) <= ace_state( c0_idx );
post_input( c1_idx ) <= ace_state( c1_idx );
post_input( d0_idx ) <= ace_state( d0_idx );
post_input( d1_idx ) <= ace_state( d1_idx );
post_input( e1_idx ) <= ace_state( e1_idx );
dsxor( 0 to half_word_sz - 3 ) <= ( others => '0' );
dsxor( half_word_sz - 2) <= i_dom_sep(1);
dsxor( half_word_sz - 1) <= i_dom_sep(0);
post_input( e0_idx ) <= dsxor xor ace_state( e0_idx ) when (i_valid = '1')
else ace_state( e0_idx );
----------------------------------------------------------------------
o_data_vector(0) <= ace_state(a1_idx);
---------------------------------------------------------------------
-- sb 64 ==> post round
pre_round <= half_words_to_words( post_input );
a_sb_64 :
entity work.sb_64 port map
( i_state => pre_round( a_idx )
, i_rc => ctl_const( lfsr_c_sz + 2 ) --rc0
, o_state => post_round( a_idx )
);
post_round( b_idx ) <= pre_round( b_idx );
c_sb_64 :
entity work.sb_64 port map
( i_state => pre_round( c_idx )
, i_rc => ctl_const( lfsr_c_sz + 1 ) --rc1
, o_state => post_round( c_idx )
);
post_round( d_idx ) <= pre_round( d_idx );
e_sb_64 :
entity work.sb_64 port map
( i_state => pre_round( e_idx )
, i_rc => ctl_const( lfsr_c_sz ) -- rc2
, o_state => post_round( e_idx )
);
----------------------------------------------------------------------
-- XORs to the left ==> post xor
post_xor( a_idx ) <= post_round( a_idx );
post_xor( c_idx ) <= post_round( c_idx );
post_xor( b_idx ) <= post_round( b_idx ) xor post_round( c_idx );
post_xor( d_idx ) <= post_round( d_idx ) xor post_round( e_idx );
post_xor( e_idx ) <= post_round( e_idx ) xor post_round( a_idx );
----------------------------------------------------------------------
-- XOR with step constant ==> post step const
post_step_const( a_idx ) <= post_xor( a_idx );
post_step_const( c_idx ) <= post_xor( c_idx );
post_step_const( b_idx)( 0 to 55) <= not post_xor( b_idx )( 0 to 55 );
post_step_const( b_idx)( 56 to 63) <= post_xor( b_idx )( 56 to 63 ) xor ctl_const( 2 to lfsr_c_sz + 2 ); -- sc0
post_step_const( d_idx)( 0 to 55) <= not post_xor( d_idx )( 0 to 55 );
post_step_const( d_idx)( 56 to 63) <= post_xor( d_idx )( 56 to 63 ) xor ctl_const( 1 to lfsr_c_sz + 1 ); -- sc1
post_step_const( e_idx)( 0 to 55) <= not post_xor( e_idx )( 0 to 55 );
post_step_const( e_idx)( 56 to 63) <= post_xor( e_idx )( 56 to 63 ) xor ctl_const( 0 to lfsr_c_sz ); --sc2
------------------------------------------------------------
-- post liear layer pi = (3,2,0,4,1) ==> post linear
post_linear( a_idx ) <= post_step_const( d_idx );
post_linear( b_idx ) <= post_step_const( c_idx );
post_linear( c_idx ) <= post_step_const( a_idx );
post_linear( d_idx ) <= post_step_const( e_idx );
post_linear( e_idx ) <= post_step_const( b_idx );
------------------------------------------------------------
-- update state
ace_path( a_idx ) <= post_linear( a_idx ) when endstep = '1' else post_round( a_idx );
ace_path( b_idx ) <= post_linear( b_idx ) when endstep = '1' else post_round( b_idx );
ace_path( c_idx ) <= post_linear( c_idx ) when endstep = '1' else post_round( c_idx );
ace_path( d_idx ) <= post_linear( d_idx ) when endstep = '1' else post_round( d_idx );
ace_path( e_idx ) <= post_linear( e_idx ) when endstep = '1' else post_round( e_idx );
process
begin
wait until rising_edge( clk );
ace_state <= words_to_half_words(ace_path);
end process;
end architecture;
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use work.ace_pkg.all;
entity lfsr_c is
port
( clk : in std_logic
; lfsr_c_en : in std_logic
; lfsr_c_reset : in std_logic
; o_const : out lfsr_c_output
);
end lfsr_c;
architecture rtl of lfsr_c is
signal sa: std_logic_vector(lfsr_c_sz - 1 downto 0);
signal xa: std_logic_vector(lfsr_c_sz + 2 downto 0);
begin
-- 10 output bits for the constants
o_const <= xa; -- "to" type <= "downto" type. Index flip intended
-- just rename signal
xa(lfsr_c_sz-1 downto 0) <= sa(lfsr_c_sz-1 downto 0);
-- for updates and outputs
xa(lfsr_c_sz + 2 downto lfsr_c_sz) <= xa(3 downto 1) xor xa(2 downto 0);
lfsr_shift: for i in lfsr_c_sz-1 downto 0 generate
lfsr_step: process(clk) begin
if rising_edge(clk) then
if lfsr_c_reset ='1' then
sa(i) <= '1';
elsif lfsr_c_en ='1' then
sa(i) <= xa(i+3);
end if;
end if;
end process;
end generate lfsr_shift;
end;
------------ ACE readme file ---------------
----- list of files for ACE synthesis: -----
ace_pkg.vhd -- main package
sb_64.vhd -- s-box with simeck
lfsr.vhd -- lfsr for step / round constant generation
ctl.vhd -- control (FSM)
dp.vhd -- datapath
ace.vhd -- top level entity declaration
ace-rtl.vhd -- top level architecture
----- additional files for simulation: -----
util_unsynth.vhd -- functions used in TB (general purpose)
ace_unsynth.vhd -- specific ACE functions and procedures used in TB
ace_tb.vhd -- ACE testbench
-------------- pure datapath ---------------
dp_pure.vhd -- datapath with most input/output multiplexers removed
----------- TB info (ace_tb.vhd): ----------
********
EDH is a 3-bit constant used to select which modes to test
"100" - encryption only
"010" - decyption only
"001" - hash only
"110" - encryption and decryption
etc.
********
stim_file_path -- stimulus file
output_file_path -- output file
********
------------ stimulus file format --------------
1 file = 1 set of Key, Nonce, AD, Plaintext and Ciphertext
K 00111122335588DD00111122335588DD <--- 128 bits of Key (all 128 bits in a single line)
N 111122335588DD00111122335588DD00 <--- 128 bits of Nonce (all 128 bits in a single line)
A 1122335588DD00111122335588DD00 <--- from 4 to 128 bits of AD
P 335588DD00111122335588DD001111 <--- from 4 to 128 bits of Plaintext
C F9362385DC213A07CEFEF38C34CEFF <--- from 4 to 128 bits of Ciphertext
--- padding is done by testbench
--- multiple lines for AD, Plaintext and Ciphertext are supported
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.ace_pkg.all;
entity sb_64 is
port
( i_state : in word
; i_rc : in std_logic
; o_state : out word
);
end entity;
architecture rtl of sb_64 is
signal x0, x1, z0, z1 : half_word;
signal rc : half_word;
begin
x1 <= i_state( 0 to half_word_sz-1 );
x0 <= i_state( half_word_sz to word_sz - 1 );
rc <= ( 0 to half_word_sz - 2 => '1', half_word_sz - 1 => i_rc );
z0 <= x1;
z1 <= ( ( x1(5 to half_word_sz - 1) & x1 (0 to 4) ) and x1)
xor ( x1(1 to half_word_sz - 1) & x1 (0) )
xor x0
xor rc;
o_state <= z1 & z0;
end architecture;
CC=gcc
NISTGCCFLAGS=-std=c99 -Wall -Wextra -Wshadow -fsanitize=address,undefined -O2
LFLAGS=-lm
all: ace128_1
ace128_1: ace128
ace128: genkat_aead.c encrypt.c ace.c
$(CC) $(NISTGCCFLAGS) -o $@ $^ $(LFLAGS)
.PHONY: clean
clean:
-rm ace128
/* Reference implementation of the ace-320 permutation
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<math.h>
#include<stdlib.h>
#include<stdint.h>
#include "ace.h"
static const unsigned char SC0[16]={0x50,0x5c,0x91,0x8d,0x53,0x60,0x68,0xe1,0xf6,0x9d,0x40,0x4f,0xbe,0x5b,0xe9,0x7f}; //Step constants (SC_{2i})
static const unsigned char SC1[16]={0x28,0xae,0x48,0xc6,0xa9,0x30,0x34,0x70,0x7b,0xce,0x20,0x27,0x5f,0xad,0x74,0x3f}; //Step constants (SC_{2i+1})
static const unsigned char SC2[16]={0x14,0x57,0x24,0x63,0x54,0x18,0x9a,0x38,0xbd,0x67,0x10,0x13,0x2f,0xd6,0xba,0x1f}; //Step constants (SC_{2i+2})
static const unsigned char RC0[16]={0x07,0x0a,0x9b,0xe0,0xd1,0x1a,0x22,0xf7,0x62,0x96,0x71,0xaa,0x2b,0xe9,0xcf,0xb7};//Round constants (RC_{2i})
static const unsigned char RC1[16]={0x53,0x5d,0x49,0x7f,0xbe,0x1d,0x28,0x6c,0x82,0x47,0x6b,0x88,0xdc,0x8b,0x59,0xc6};//Round constants (RC_{2i+1})
static const unsigned char RC2[16]={0x43,0xe4,0x5e,0xcc,0x32,0x4e,0x75,0x25,0xfd,0xf9,0x76,0xa0,0xb0,0x09,0x1e,0xad};//Round constants (RC_{2i+2})
unsigned char rotl8 ( const unsigned char x, const unsigned char y, const unsigned char shift )
{
return ((x<<shift)|(y>>(8-shift)));
}
/***********************************************************
******* ACE permutation implementation ********************
*********************************************************/
void ace_print_state( const unsigned char *state )
{
unsigned char i;
for ( i = 0; i < STATEBYTES; i++ )
printf("%02X", state[i]);
printf("\n");
}
void ace_print_data(const uint8_t *x, const uint32_t xlen )
{
uint32_t j;
for ( j = 0; j < xlen; j++ )
printf("%.2x ", x[j]);
printf("\n");
return;
}
void simeck64_box( unsigned char *output, const unsigned char *input, const unsigned char rc )
{
unsigned char i, t;
unsigned char *tmp_shift_1, *tmp_shift_5, *tmp_pt;
tmp_shift_1 = (unsigned char *)malloc(4*sizeof(unsigned char));
tmp_shift_5 = (unsigned char *)malloc(4*sizeof(unsigned char));
tmp_pt = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
for ( i = 0; i < SIMECKBYTES; i++ )
tmp_pt[i] = input[i];
for ( i = 0; i < SIMECKROUND; i++ )
{
tmp_shift_1[0] = rotl8(tmp_pt[0], tmp_pt[1],1);
tmp_shift_1[1] = rotl8(tmp_pt[1], tmp_pt[2],1);
tmp_shift_1[2] = rotl8(tmp_pt[2], tmp_pt[3],1);
tmp_shift_1[3] = rotl8(tmp_pt[3], tmp_pt[0],1);
tmp_shift_5[0] = rotl8(tmp_pt[0], tmp_pt[1],5);
tmp_shift_5[1] = rotl8(tmp_pt[1], tmp_pt[2],5);
tmp_shift_5[2] = rotl8(tmp_pt[2], tmp_pt[3],5);
tmp_shift_5[3] = rotl8(tmp_pt[3], tmp_pt[0],5);
tmp_shift_5[0] = tmp_shift_5[0]&tmp_pt[0];
tmp_shift_5[1] = tmp_shift_5[1]&tmp_pt[1];
tmp_shift_5[2] = tmp_shift_5[2]&tmp_pt[2];
tmp_shift_5[3] = tmp_shift_5[3]&tmp_pt[3];
tmp_shift_1[0] = tmp_shift_1[0]^tmp_shift_5[0];
tmp_shift_1[1] = tmp_shift_1[1]^tmp_shift_5[1];
tmp_shift_1[2] = tmp_shift_1[2]^tmp_shift_5[2];
tmp_shift_1[3] = tmp_shift_1[3]^tmp_shift_5[3];
tmp_shift_1[0] = tmp_shift_1[0]^tmp_pt[4]^(0xff);
tmp_shift_1[1] = tmp_shift_1[1]^tmp_pt[5]^(0xff);
tmp_shift_1[2] = tmp_shift_1[2]^tmp_pt[6]^(0xff);
tmp_shift_1[3] = tmp_shift_1[3]^tmp_pt[7]^(0xfe);
t = (rc >> i)&1;
tmp_shift_1[3] = tmp_shift_1[3]^t;
tmp_pt[4] = tmp_pt[0];
tmp_pt[5] = tmp_pt[1];
tmp_pt[6] = tmp_pt[2];
tmp_pt[7] = tmp_pt[3];
tmp_pt[0] = tmp_shift_1[0];
tmp_pt[1] = tmp_shift_1[1];
tmp_pt[2] = tmp_shift_1[2];
tmp_pt[3] = tmp_shift_1[3];
//simeck_print_data(tmp_pt, 8);
}
for ( i = 0; i < SIMECKBYTES; i++ )
output[i] = tmp_pt[i];
free(tmp_shift_1);
free(tmp_shift_5);
free(tmp_pt);
return;
}
void ace_permutation( unsigned char *input )
{
unsigned char i, j;
unsigned char *tmp_inp, *tmp_a, *tmp_c, *tmp_e;
tmp_inp = (unsigned char *)malloc(STATEBYTES*sizeof(unsigned char));
tmp_a = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
tmp_c = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
tmp_e = (unsigned char *)malloc(SIMECKBYTES*sizeof(unsigned char));
for ( i = 0; i < STATEBYTES; i++ )
tmp_inp[i] = input[i];
for ( i = 0; i < NUMSTEPS; i++ )
{
//A block
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_a[j] = tmp_inp[j];
simeck64_box( tmp_a, tmp_a, RC0[i] );
//C block
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_c[j] = tmp_inp[2*SIMECKBYTES+j];
simeck64_box( tmp_c, tmp_c, RC1[i] );
//E block
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_e[j] = tmp_inp[4*SIMECKBYTES+j];
simeck64_box( tmp_e, tmp_e, RC2[i] );
// Update A: A <= SC_{3i+1}+D+F(E)
for ( j = 0; j < SIMECKBYTES-1; j++ )
tmp_inp[j] = tmp_inp[3*SIMECKBYTES+j]^tmp_e[j]^(0xff);
tmp_inp[SIMECKBYTES-1] = tmp_inp[4*SIMECKBYTES-1]^tmp_e[SIMECKBYTES-1]^SC1[i];
// Update E: E <= SC_{3i}+B+F(C)
for ( j = 0; j < SIMECKBYTES-1; j++ )
tmp_inp[4*SIMECKBYTES+j] = tmp_inp[SIMECKBYTES+j]^tmp_c[j]^(0xff);
tmp_inp[5*SIMECKBYTES-1] = tmp_inp[2*SIMECKBYTES-1]^tmp_c[SIMECKBYTES-1]^SC0[i];
// Update B: B <= F(C)
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_inp[SIMECKBYTES+j] = tmp_c[j];
// Update C: C <= F(A)
for ( j = 0; j < SIMECKBYTES; j++ )
tmp_inp[2*SIMECKBYTES+j] = tmp_a[j];
// Update D: D <= SC_{3i+2}+F(A)+F(E)
for ( j = 0; j < SIMECKBYTES-1; j++ )
tmp_inp[3*SIMECKBYTES+j] = tmp_a[j]^tmp_e[j]^(0xff);
tmp_inp[4*SIMECKBYTES-1] = tmp_a[SIMECKBYTES-1]^tmp_e[SIMECKBYTES-1]^SC2[i];
//ace_print_state(tmp_inp); // Printing intermediate state
}
for ( i = 0; i < STATEBYTES; i++ )
input[i] = tmp_inp[i];
free(tmp_a);
free(tmp_c);
free(tmp_e);
free(tmp_inp);
return;
}
void ace_permutation_ALLZERO ( unsigned char *state )
{
unsigned char i;
for ( i = 0; i < STATEBYTES; i++ )
state[i] = 0x0;
ace_print_state(state);
ace_permutation(state);
return;
}
void ace_permutation_ALLONE ( unsigned char *state )
{
unsigned char i;
for ( i = 0; i < STATEBYTES; i++ )
state[i] = 0xff;
//ace_print_state( state );
ace_permutation(state);
return;
}
/* Reference implementation of the ACE permutation
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#ifndef ACE_H
#define ACE_H
#include<math.h>
#include<stdlib.h>
#include<stdint.h>
#define STATEBYTES 40 //Number OF BYTES = 320/8 = 40
#define SIMECKBYTES 8 //Number of Simeck BYTES = 64/8 = 8
#define SIMECKROUND 8 //Number of rounds
#define NUMSTEPS 16 //Number of steps
typedef unsigned long long u64;
unsigned char rotl8 ( const unsigned char x, const unsigned char y, const unsigned char shift );
void ace_print_data(const unsigned char *x, const uint32_t xlen );
void simeck_print_data(const unsigned char *y, const unsigned char ylen );
void simeck64_box( unsigned char *output, const unsigned char *input, const unsigned char rc );
void ace_permutation( unsigned char *input );
void ace_print_state( const unsigned char *state );
void ace_permutation_ALLZERO ( unsigned char *state );
void ace_permutation_ALLONE ( unsigned char *state );
#endif
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
/* Reference implementation of ACE-128 AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
typedef unsigned long long u64;
int ace_init(
unsigned char *state,
const unsigned char *npub,
const unsigned char *k
);
int ace_ad(
unsigned char *state,
const unsigned char *ad,
const u64 adlen
);
int ace_gentag(
unsigned char *tag,
const unsigned char tlen,
unsigned char *state,
const unsigned char *k
);
int crypto_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_decrypt(
unsigned char *m,unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
int ace_gentag(
unsigned char *tag,
const unsigned char tlen,
unsigned char *state,
const unsigned char *k
);
/* Reference implementation of ACE-128 AEAD
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<math.h>
#include<stdlib.h>
#include<stdint.h>
#include "ace.h"
#include "crypto_aead.h"
#include "api.h"
#define KAT_SUCCESS 0
#define KAT_FILE_OPEN_ERROR -1
#define KAT_DATA_ERROR -3
#define KAT_CRYPTO_FAILURE -4
const unsigned char rate_bytes[8] = {0,1,2,3,16,17,18,19};
int ace_init(
unsigned char *state,
const unsigned char *npub,
const unsigned char *k
)
{
unsigned char i;
//Initialize the state to all-ZERO
for ( i = 0; i < STATEBYTES; i++ )
state[i] = 0x0;
if ( CRYPTO_KEYBYTES == 16 && CRYPTO_NPUBBYTES == 16 )
{
//Assigning key at A[0..7] & C[0..7]
for ( i = 0; i < 8; i++ )
state[i] = k[i];
for ( i = 0; i < 8; i++ )
state[16+i] = k[8+i];
//Assigning nonce at B[0..7] & E[0..7]
for ( i = 0; i < 8; i++ )
state[8+i] = npub[i];
for ( i = 0; i < 8; i++ )
state[32+i] = npub[8+i];
ace_permutation(state);
for ( i = 0; i < 8; i++ )
state[rate_bytes[i]]^=k[i];
ace_permutation(state);
for ( i = 0; i < 8; i++ )
state[rate_bytes[i]]^=k[8+i];
ace_permutation(state);
}
else
{
return KAT_CRYPTO_FAILURE;
}
return KAT_SUCCESS;
}
int ace_ad(
unsigned char *state,
const unsigned char *ad,
const u64 adlen
)
{
unsigned char i, lblen;
u64 j, ad64len = adlen/8;
lblen = (unsigned char)(adlen%8);
if ( adlen == 0 )
return(KAT_SUCCESS);
//Absorbing associated data
for ( j = 0; j < ad64len; j++ )
{
for ( i = 0; i < 8; i++ )
state[rate_bytes[i]]^=ad[8*j+((u64)i)];
//Domain seperator
state[STATEBYTES-1]^=(0x01);
ace_permutation(state);
}
//Process the last 64-bit block.
if ( lblen != 0 )
{
for ( i = 0; i < lblen; i++ )
state[rate_bytes[i]]^=ad[ad64len*8+(u64)i];
state[rate_bytes[lblen]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x01);
ace_permutation(state );
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x01);
ace_permutation(state );
}
return (KAT_SUCCESS);
}
int ace_gentag(
unsigned char *tag,
const unsigned char tlen,
unsigned char *state,
const unsigned char *k
)
{
unsigned char i;
if ( CRYPTO_KEYBYTES == 16 && tlen == 16 )
{
for ( i = 0; i < 8; i++ )
state[rate_bytes[i]]^=k[i];
ace_permutation(state);
for ( i = 0; i < 8; i++ )
state[rate_bytes[i]]^=k[8+i];
ace_permutation(state);
//Extracting 128-bit tag from X1 and X3
for ( i = 0; i < 8; i++ )
{
tag[i] = state[i];
tag[8+i] = state[16+i];
}
}
else
{
printf("Invalid key and tag length pair.\n");
return KAT_CRYPTO_FAILURE;
}
return KAT_SUCCESS;
}
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
unsigned char *state;
unsigned char *tag;
unsigned char i, lblen;
u64 j, m64len;
m64len = mlen/8;
lblen = (unsigned char)(mlen%8);
state = (unsigned char *)malloc(sizeof(unsigned char)*STATEBYTES);
tag = (unsigned char *)malloc(sizeof(unsigned char)*CRYPTO_ABYTES);
//Initialize state with "key" and "nonce" and then absorbe "key" again
if ( ace_init(state, npub, k)!= KAT_SUCCESS )
return(KAT_CRYPTO_FAILURE);
//Absorbing "ad"
if ( adlen != 0 )
{
if ( ace_ad( state, ad, adlen) != KAT_SUCCESS)
return(KAT_CRYPTO_FAILURE);
}
//Encrypting "message(m)" and producing "ciphertext (c)"
if ( mlen != 0 )
{
for ( j = 0; j < m64len; j++ )
{
for ( i = 0; i < 8; i++ )
{
c[8*j+((u64)i)] = m[8*j+((u64)i)]^state[rate_bytes[i]];
state[rate_bytes[i]] = c[8*j+((u64)i)];
}
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state);
}
if ( lblen != 0 )
{
//Encrypting the padded 64-bit block when "mlen" is not a multiple of 8
for ( i = 0; i < lblen; i++ )
{
c[8*m64len+((u64)i)] = m[m64len*8+(u64)i]^state[rate_bytes[i]];
state[rate_bytes[i]] = c[8*m64len+((u64)i)];
}
state[rate_bytes[lblen]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state);
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state );
}
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state );
}
//Appending tag to the end of ciphertext
if ( ace_gentag( tag, CRYPTO_ABYTES, state, k ) != KAT_SUCCESS )
return(KAT_CRYPTO_FAILURE);
else
{
for ( i = 0; i < CRYPTO_ABYTES; i++ )
c[mlen+(u64)i] = tag[i];
}
*clen = mlen+CRYPTO_ABYTES;
/*printf("Print tag after enc.:\n");
for ( i = 0; i < 16; i++ )
printf("%.2X", tag[i]);
printf("\n");*/
free(state);
free(tag);
return KAT_SUCCESS;
}
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
unsigned char i, lblen;
u64 j, clen1, c64len;
clen1 = clen-CRYPTO_ABYTES;
c64len = clen1/8;
lblen = (unsigned char)(clen1%8);
unsigned char *state;
unsigned char *tag;
state = (unsigned char *)malloc(sizeof(unsigned char)*STATEBYTES);
tag = (unsigned char *)malloc(sizeof(unsigned char)*CRYPTO_ABYTES);
//Initialize state with "key" and "nonce" and then absorbe "key" again
if ( ace_init(state, npub, k)!= KAT_SUCCESS )
return(KAT_CRYPTO_FAILURE);
//Absorbing "ad"
if ( adlen != 0 )
{
if ( ace_ad( state, ad, adlen) != KAT_SUCCESS)
return(KAT_CRYPTO_FAILURE);
}
if ( clen1 != 0 )
{
for ( j = 0; j < c64len; j++ )
{
for ( i = 0; i < 8; i++ )
{
m[8*j+((u64)i)] = c[8*j+((u64)i)]^state[rate_bytes[i]];
state[rate_bytes[i]] = c[8*j+((u64)i)];
}
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state);
}
if ( lblen != 0 )
{
//Decrypting last 64-bit block
for ( i = 0; i < lblen; i++ )
{
m[8*c64len +((u64)i)] = c[8*c64len +((u64)i)]^state[rate_bytes[i]];
state[rate_bytes[i]] = c[8*c64len +((u64)i)];
}
state[rate_bytes[i]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state);
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state );
}
}
else
{
state[rate_bytes[0]]^=(0x80); //Padding: 10*
//Domain seperator
state[STATEBYTES-1]^=(0x02);
ace_permutation(state );
}
//Generating and verifying the tag
if ( ace_gentag( tag, CRYPTO_ABYTES, state, k ) != KAT_SUCCESS )
return(KAT_CRYPTO_FAILURE);
else
{
for ( i = 0; i < CRYPTO_ABYTES; i++ )
{
if ( c[clen1 + (u64)i] != tag[i] )
return(KAT_CRYPTO_FAILURE);
}
}
*mlen = clen-CRYPTO_ABYTES;
/*printf("Print tag after dec.:\n");
for ( i = 0; i < 16; i++ )
printf("%.2X", tag[i]);
printf("\n");*/
free(state);
free(tag);
return KAT_SUCCESS;
}
//
// NIST-developed software is provided by NIST as a public service.
// You may use, copy and distribute copies of the software in any medium,
// provided that you keep intact this entire notice. You may improve,
// modify and create derivative works of the software or any portion of
// the software, and you may copy and distribute such modifications or
// works. Modified works should carry a notice stating that you changed
// the software and should note the date and nature of any such change.
// Please explicitly acknowledge the National Institute of Standards and
// Technology as the source of the software.
//
// NIST-developed software is expressly provided "AS IS." NIST MAKES NO
// WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION
// OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST
// NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE
// UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST
// DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE
// OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY,
// RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
//
// You are solely responsible for determining the appropriateness of using and
// distributing the software and you assume all risks associated with its use,
// including but not limited to the risks and costs of program errors, compliance
// with applicable laws, damage to or loss of data, programs or equipment, and
// the unavailability or interruption of operation. This software is not intended
// to be used in any situation where a failure could cause risk of injury or
// damage to property. The software developed by NIST employees is not subject to
// copyright protection within the United States.
//
// disable deprecation for sprintf and fopen
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdio.h>
#include <string.h>
#include "crypto_aead.h"
#include "api.h"
#define KAT_SUCCESS 0
#define KAT_FILE_OPEN_ERROR -1
#define KAT_DATA_ERROR -3
#define KAT_CRYPTO_FAILURE -4
#define MAX_FILE_NAME 256
#define MAX_MESSAGE_LENGTH 32
#define MAX_ASSOCIATED_DATA_LENGTH 32
void init_buffer(unsigned char *buffer, unsigned long long numbytes);
void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, unsigned long long length);
int generate_test_vectors();
int main()
{
int ret = generate_test_vectors();
if (ret != KAT_SUCCESS) {
fprintf(stderr, "test vector generation failed with code %d\n", ret);
}
return ret;
}
int generate_test_vectors()
{
FILE *fp;
char fileName[MAX_FILE_NAME];
unsigned char key[CRYPTO_KEYBYTES];
unsigned char nonce[CRYPTO_NPUBBYTES];
unsigned char msg[MAX_MESSAGE_LENGTH];
unsigned char msg2[MAX_MESSAGE_LENGTH];
unsigned char ad[MAX_ASSOCIATED_DATA_LENGTH];
unsigned char ct[MAX_MESSAGE_LENGTH + CRYPTO_ABYTES];
unsigned long long clen, mlen2;
int count = 1;
int func_ret, ret_val = KAT_SUCCESS;
init_buffer(key, sizeof(key));
init_buffer(nonce, sizeof(nonce));
init_buffer(msg, sizeof(msg));
init_buffer(ad, sizeof(ad));
sprintf(fileName, "../LWC_AEAD_KAT_%d_%d.txt", (CRYPTO_KEYBYTES * 8), (CRYPTO_NPUBBYTES * 8));
if ((fp = fopen(fileName, "w")) == NULL) {
fprintf(stderr, "Couldn't open <%s> for write\n", fileName);
return KAT_FILE_OPEN_ERROR;
}
for (unsigned long long mlen = 0; (mlen <= MAX_MESSAGE_LENGTH) && (ret_val == KAT_SUCCESS); mlen++) {
for (unsigned long long adlen = 0; adlen <= MAX_ASSOCIATED_DATA_LENGTH; adlen++) {
fprintf(fp, "Count = %d\n", count++);
fprint_bstr(fp, "Key = ", key, CRYPTO_KEYBYTES);
fprint_bstr(fp, "Nonce = ", nonce, CRYPTO_NPUBBYTES);
fprint_bstr(fp, "PT = ", msg, mlen);
fprint_bstr(fp, "AD = ", ad, adlen);
if ((func_ret = crypto_aead_encrypt(ct, &clen, msg, mlen, ad, adlen, NULL, nonce, key)) != 0) {
fprintf(fp, "crypto_aead_encrypt returned <%d>\n", func_ret);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
fprint_bstr(fp, "CT = ", ct, clen);
fprintf(fp, "\n");
if ((func_ret = crypto_aead_decrypt(msg2, &mlen2, NULL, ct, clen, ad, adlen, nonce, key)) != 0) {
fprintf(fp, "crypto_aead_decrypt returned <%d>\n", func_ret);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
if (mlen != mlen2) {
fprintf(fp, "crypto_aead_decrypt returned bad 'mlen': Got <%llu>, expected <%llu>\n", mlen2, mlen);
ret_val = KAT_CRYPTO_FAILURE;
break;
}
if (memcmp(msg, msg2, mlen)) {
fprintf(fp, "crypto_aead_decrypt did not recover the plaintext\n");
ret_val = KAT_CRYPTO_FAILURE;
break;
}
}
}
fclose(fp);
return ret_val;
}
void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, unsigned long long length)
{
fprintf(fp, "%s", label);
for (unsigned long long i = 0; i < length; i++)
fprintf(fp, "%02X", data[i]);
fprintf(fp, "\n");
}
void init_buffer(unsigned char *buffer, unsigned long long numbytes)
{
for (unsigned long long i = 0; i < numbytes; i++)
buffer[i] = (unsigned char)i;
}
This source diff could not be displayed because it is too large. You can view the blob instead.
#CC=gcc -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse3 -mmmx -mavx -mavx2
#CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2 -mmmx -mavx -mavx2
CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -mavx -mavx2
#CC=gcc -O1 -fomit-frame-pointer -march=haswell -mtune=native -mavx
#ivybridgei, skylake, sandybridge, haswell
all: aceavx_1
aceavx_1:aceavx
aceavx: speed.c ace.c
$(CC) -o $@ $^
.PHONY: clean
clean:
rm -fr aceavx
/* Reference implementation of ACE-Hash256
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#ifndef ACE_H
#define ACE_H
#include<stdint.h>
#include<x86intrin.h>
#include<smmintrin.h>
#include<immintrin.h>
#define STATEBYTES 40
#define STATEDWORD 10 // 320/32 = 8//
#define SIMECKROUND 8
//#define NUMSTEPS 16
#define NUMSTEPS 16
#define PARAL_INST_BY8 1
//New
static const unsigned char SC0[16]={0x50,0x5c,0x91,0x8d,0x53,0x60,0x68,0xe1,0xf6,0x9d,0x40,0x4f,0xbe,0x5b,0xe9,0x7f}; //Step constants (SC_{2i})
static const unsigned char SC1[16]={0x28,0xae,0x48,0xc6,0xa9,0x30,0x34,0x70,0x7b,0xce,0x20,0x27,0x5f,0xad,0x74,0x3f}; //Step constants (SC_{2i+1})
static const unsigned char SC2[16]={0x14,0x57,0x24,0x63,0x54,0x18,0x9a,0x38,0xbd,0x67,0x10,0x13,0x2f,0xd6,0xba,0x1f}; //Step constants (SC_{2i+2})
static const unsigned char RC0[16]={0x07,0x0a,0x9b,0xe0,0xd1,0x1a,0x22,0xf7,0x62,0x96,0x71,0xaa,0x2b,0xe9,0xcf,0xb7};//Round constants (RC_{2i})
static const unsigned char RC1[16]={0x53,0x5d,0x49,0x7f,0xbe,0x1d,0x28,0x6c,0x82,0x47,0x6b,0x88,0xdc,0x8b,0x59,0xc6};//Round constants (RC_{2i+1})
static const unsigned char RC2[16]={0x43,0xe4,0x5e,0xcc,0x32,0x4e,0x75,0x25,0xfd,0xf9,0x76,0xa0,0xb0,0x09,0x1e,0xad};//Round constants (RC_{2i+2})
typedef unsigned long long int u64;
typedef unsigned int u32;
typedef unsigned int u8;
#define ROT5(x) (_mm256_slli_epi32(x, 5) | _mm256_srli_epi32(x, 27))
#define ROT1(x) (_mm256_slli_epi32(x, 1) | _mm256_srli_epi32(x, 31))
#define SWAPREG1(x) (_mm256_permutevar8x32_epi32(x, _mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0)))
#define RC(t1, t2) (_mm256_set_epi32(0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2,0xfffffffe^t1))
#define SC(t1, t2) (_mm256_set_epi32(0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff, 0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff))
#define SWAPBLK(x) (_mm256_permute4x64_epi64(x, _MM_SHUFFLE(2,3,0,1)))
#define SWAPREG2(x) (_mm256_permutevar8x32_epi32(x, _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0)))
#define SWAPAC(x) (_mm256_permutevar8x32_epi32(xtmp, _mm256_set_epi32(5, 4, 7, 6, 1, 0, 3, 2)))
#define masklo (_mm256_set_epi32(0x0, 0x0, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff,0xffffffff))
#define maskhi (_mm256_set_epi32(0xffffffff, 0xffffffff, 0xffffffff,0xffffffff,0x0, 0x0, 0x0, 0x0))
#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)
#define ROAX(x, y, t1, t2)\
{\
__m256i x2tmp;\
x2tmp = x;\
x = (ROT5(x)&x)^ROT1(x)^RC(t1, t2)^y;\
y = x2tmp;\
}
#define PACK_SSb(x, y)\
{\
__m256i xtmp, ytmp;\
xtmp = SWAPREG1(x);\
ytmp = SWAPREG1(y);\
x = _mm256_permute2x128_si256(xtmp,ytmp,0x20);\
y = _mm256_permute2x128_si256(xtmp,ytmp, 0x31);\
}
#define UNPACK_SSb(x, y)\
{\
__m256i xtmp, ytmp;\
xtmp = _mm256_unpacklo_epi32(x, y);\
ytmp = _mm256_unpackhi_epi32(x, y);\
x = _mm256_permute2x128_si256(xtmp, ytmp,0x20);\
y = _mm256_permute2x128_si256(xtmp, ytmp, 0x31);\
}
#define PACK(x, y, z, w)\
{\
__m256i x2tmp, x3tmp;\
x2tmp = SWAPREG2(x);\
x3tmp = SWAPREG2(z);\
x = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
z = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
x2tmp = SWAPREG2(y);\
x3tmp = SWAPREG2(w);\
y = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
w = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
}
#define UNPACK(x,y,z,w)\
{\
__m256i x2tmp, x3tmp;\
x2tmp = _mm256_unpacklo_epi64(x, z);\
x3tmp = _mm256_unpackhi_epi64(x, z);\
x = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
z = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
x2tmp = _mm256_unpacklo_epi64(y, w);\
x3tmp = _mm256_unpackhi_epi64(y, w);\
y = _mm256_permute2x128_si256(x2tmp,x3tmp,0x20);\
w = _mm256_permute2x128_si256(x2tmp,x3tmp, 0x31);\
}
void ace320( u32 *state );
int crypto_hash( u32 *out, u32 *in, u64 inlen );
#endif
#ifndef CLOCK_CYCLE_H
#define CLOCK_CYCLE_H
typedef unsigned long long int u64;
u64 start_rdtsc( )
{
unsigned high, low;
__asm__ volatile("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (high),
"=r" (low):: "%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
u64 end_rdtsc( )
{
unsigned high, low;
__asm__ volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax,%1\n\t"
"CPUID\n\t": "=r" (high), "=r" (low)::
"%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
static inline u64 cpucycles( )
{
u64 result;
asm volatile (".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
: "=a" (result) :: "%rdx");
return result;
}
/*#ifdef __x86_64__
#define mycpucycles(RES) \
__asm__ volatile("rdtsc;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (RES) :: "%rdx");
#else
#define mycpucycles(RES) \
__asm__ volatile(".byte 15;.byte 49" : "=A" (RES));
#endif
*/
#endif
\ No newline at end of file
/* Reference implementation of ACE-Hash256
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<math.h>
#include "ace.h"
#include "clock_cycle.h"
#define NUM_ITER 2000
#define NUM_TEST 500
void print_state ( u32 *state )
{
u8 i, j;
for ( j = 0; j < 8*PARAL_INST_BY8; j++ )
{
for ( i = 0; i < STATEDWORD; i++ )
printf("%.8X", state[i+j*STATEDWORD]);
printf("\n");
}
return;
}
int main()
{
u8 num_parallel_inst;
u32 *state;
int i, j;
u64 t[NUM_ITER+1], count_cc;
u32 *plaintext, *digest;
u32 hlen;
u64 plen;
num_parallel_inst = 8*PARAL_INST_BY8;
plen = 32; // Message length = plen*32 bits;
hlen = 8; //256 = 32*8 bits
digest = (u32 *)malloc(sizeof(u32)*hlen*num_parallel_inst);
plaintext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
state = (u32 *)malloc(sizeof(u32)*num_parallel_inst*STATEDWORD);
//Randomly generating messages
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
plaintext[i*plen+j] = j%128;
}
//===================================================================================================================
// Hash Module//
//===================================================================================================================
for ( i = 0; i < NUM_ITER; i++ )
{
//plaintext[0] = plaintext[0]^i;
count_cc = start_rdtsc();
crypto_hash( digest, plaintext, plen );
count_cc = end_rdtsc()-count_cc;
printf("Hash speed = %f cbp\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
}
printf("Hash speed = %f cbp\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
//plen = 0;
crypto_hash( digest, plaintext, plen );
printf("Original plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Digest:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < hlen; j++ )
printf("%08X", digest[i*hlen+j]);
printf("\n");
}
free(state);
free(plaintext);
free(digest);
return(0);
}
#CC=gcc -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse3 -mmmx -mavx -mavx2
#CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2 -mmmx -mavx -mavx2
CC=gcc -Wall -O2 -fomit-frame-pointer -funroll-all-loops -march=native -mtune=native -msse2
#CC=gcc -O1 -fomit-frame-pointer -march=haswell -mtune=native -mavx
#ivybridgei, skylake, sandybridge, haswell
all: acesse2_1
acesse2_1:acesse2
acesse2: speed.c ace.c
$(CC) -o $@ $^
.PHONY: clean
clean:
rm -fr acesse2
/* Reference implementation of ACE-Hash256
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#ifndef ACE_H
#define ACE_H
#include<stdint.h>
#include<x86intrin.h>
#include<smmintrin.h>
#define STATEBYTES 40
#define STATEDWORD 10 // 320/32 = 8//
#define SIMECKROUND 8
#define NUMSTEPS 16
#define PARAL_INST_BY4 1
static const unsigned char SC0[16]={0x50,0x5c,0x91,0x8d,0x53,0x60,0x68,0xe1,0xf6,0x9d,0x40,0x4f,0xbe,0x5b,0xe9,0x7f}; //Step constants (SC_{2i})
static const unsigned char SC1[16]={0x28,0xae,0x48,0xc6,0xa9,0x30,0x34,0x70,0x7b,0xce,0x20,0x27,0x5f,0xad,0x74,0x3f}; //Step constants (SC_{2i+1})
static const unsigned char SC2[16]={0x14,0x57,0x24,0x63,0x54,0x18,0x9a,0x38,0xbd,0x67,0x10,0x13,0x2f,0xd6,0xba,0x1f}; //Step constants (SC_{2i+2})
static const unsigned char RC0[16]={0x07,0x0a,0x9b,0xe0,0xd1,0x1a,0x22,0xf7,0x62,0x96,0x71,0xaa,0x2b,0xe9,0xcf,0xb7};//Round constants (RC_{2i})
static const unsigned char RC1[16]={0x53,0x5d,0x49,0x7f,0xbe,0x1d,0x28,0x6c,0x82,0x47,0x6b,0x88,0xdc,0x8b,0x59,0xc6};//Round constants (RC_{2i+1})
static const unsigned char RC2[16]={0x43,0xe4,0x5e,0xcc,0x32,0x4e,0x75,0x25,0xfd,0xf9,0x76,0xa0,0xb0,0x09,0x1e,0xad};//Round constants (RC_{2i+2})
typedef unsigned long long int u64;
typedef unsigned int u32;
typedef unsigned int u8;
#define ROT5(x) (_mm_slli_epi32(x, 5) | _mm_srli_epi32(x, 27))
#define ROT1(x) (_mm_slli_epi32(x, 1) | _mm_srli_epi32(x, 31))
#define RC(t1, t2) (_mm_set_epi32(0xfffffffe^t2, 0xfffffffe^t1, 0xfffffffe^t2, 0xfffffffe^t1))
#define SC(t1, t2) (_mm_set_epi32(0xffffff00^t2, 0xffffffff, 0xffffff00^t1, 0xffffffff ))
#define SWAPREG1(x) (_mm_shuffle_epi32(x, _MM_SHUFFLE(3, 1, 2, 0)))
#define SWAPBLK(x) (_mm_slli_si128(x, 8)|_mm_srli_si128(x, 8))
#define masklo (_mm_set_epi32(0x0, 0x0, 0xffffffff, 0xffffffff ))
#define maskhi (_mm_set_epi32(0xffffffff, 0xffffffff, 0x0, 0x0 ))
#define ROAX(x, y, t1, t2)\
{\
__m128i xtmp;\
xtmp = x;\
x = (ROT5(x)&x)^ROT1(x)^RC(t1, t2)^y;\
y = xtmp;\
}
#define PACK_SSb(x, y)\
{\
__m128i xtmp, ytmp;\
xtmp = SWAPREG1(x);\
ytmp = SWAPREG1(y);\
x = _mm_unpacklo_epi64(xtmp, ytmp);\
y = _mm_unpackhi_epi64(xtmp, ytmp);\
}
#define UNPACK_SSb(x, y)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_unpacklo_epi32(x, y);\
ytmp = _mm_unpackhi_epi32(x, y);\
x = xtmp;\
y = ytmp;\
}
#define PACK(x, y, z, w, state, i1, i2, i3, i4)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_loadu_si128((void *) (state + i1));\
ytmp = _mm_loadu_si128((void *) (state + i2));\
x = _mm_unpacklo_epi64(xtmp, ytmp);\
z = _mm_unpackhi_epi64(xtmp, ytmp);\
xtmp = _mm_loadu_si128((void *) (state + i3));\
ytmp = _mm_loadu_si128((void *) (state + i4));\
y = _mm_unpacklo_epi64(xtmp, ytmp);\
w = _mm_unpackhi_epi64(xtmp, ytmp);\
}
#define UNPACK(x, y, z, w)\
{\
__m128i xtmp, ytmp;\
xtmp = _mm_unpacklo_epi64(x, z);\
ytmp = _mm_unpackhi_epi64(x, z);\
x = xtmp;\
z = ytmp;\
xtmp = _mm_unpacklo_epi64(y, w);\
ytmp = _mm_unpackhi_epi64(y, w);\
y = xtmp;\
w = ytmp;\
}
void ace320( u32 *state );
int crypto_hash( u32 *out, u32 *in, u64 inlen );
#endif
#ifndef CLOCK_CYCLE_H
#define CLOCK_CYCLE_H
typedef unsigned long long int u64;
u64 start_rdtsc( )
{
unsigned high, low;
__asm__ volatile("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (high),
"=r" (low):: "%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
u64 end_rdtsc( )
{
unsigned high, low;
__asm__ volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax,%1\n\t"
"CPUID\n\t": "=r" (high), "=r" (low)::
"%rax", "%rbx", "%rcx", "%rdx");
return ( ((u64)low) | (((u64)high) << 32));
}
static inline u64 cpucycles( )
{
u64 result;
asm volatile (".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
: "=a" (result) :: "%rdx");
return result;
}
/*#ifdef __x86_64__
#define mycpucycles(RES) \
__asm__ volatile("rdtsc;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (RES) :: "%rdx");
#else
#define mycpucycles(RES) \
__asm__ volatile(".byte 15;.byte 49" : "=A" (RES));
#endif
*/
#endif
\ No newline at end of file
/* Reference implementation of ACE-Hash256
Written by:
Kalikinkar Mandal <kmandal@uwaterloo.ca>
*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<math.h>
#include "ace.h"
#include "clock_cycle.h"
#define NUM_ITER 2000
#define NUM_TEST 500
void print_state ( u32 *state )
{
u8 i, j;
for ( j = 0; j < 4*PARAL_INST_BY4; j++ )
{
for ( i = 0; i < STATEDWORD; i++ )
printf("%.8X", state[i+j*STATEDWORD]);
printf("\n");
}
return;
}
int main()
{
u8 num_parallel_inst;
u32 *state;
int i, j;
u64 t[NUM_ITER+1], count_cc;
u32 *plaintext, *digest;
u64 plen;
u32 hlen;
num_parallel_inst = 4*PARAL_INST_BY4;
plen = 32; // Message length = plen*32;
hlen = 8; //256 = 32*8 bits
digest = (u32 *)malloc(sizeof(u32)*hlen*num_parallel_inst);
plaintext = (u32 *)malloc(sizeof(u32)*plen*num_parallel_inst);
state = (u32 *)malloc(sizeof(u32)*num_parallel_inst*STATEDWORD);
//Randomly generating messages//
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
plaintext[i*plen+j] = j%128;
}
//===================================================================================================================
// HASH Mode//
//===================================================================================================================
//Testing speed for ACE-Hash
for ( i = 0; i < NUM_ITER; i++ )
{
count_cc = start_rdtsc();
crypto_hash ( digest, plaintext, plen );
count_cc = end_rdtsc()-count_cc;
printf("Hash speed = %f cpb\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
}
printf("Hash speed = %f cbp\n", (double)(count_cc)/(double)(num_parallel_inst*plen*4));
//Conputing hash
crypto_hash ( digest, plaintext, plen );
printf("Original plaintext:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < plen; j++ )
printf("%08X", plaintext[i*plen+j]);
printf("\n");
}
printf("Digest:\n");
for ( i = 0; i < num_parallel_inst; i++ )
{
for ( j = 0; j < hlen; j++ )
printf("%08X", digest[i*hlen+j]);
printf("\n");
}
free(state);
free(plaintext);
free(digest);
return(0);
}
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
architecture rtl of ace is
signal ctl_control : ace_ctl_ty;
signal ctl_onehot : onehot_ty;
signal ctl_lfsr_en : std_logic;
signal ctl_lfsr_reset : std_logic;
begin
u_dp :
entity work.dp port map
( clk => clk
, reset => reset
, i_mode => i_mode
, i_control => ctl_control
, i_onehot => ctl_onehot
, i_dom_sep => i_dom_sep
, i_valid => i_valid
, i_data => i_data
, i_padding => i_padding
, o_data => o_data
);
u_ctl :
entity work.ctl port map
( clk => clk
, reset => reset
, i_mode => i_mode
, i_dom_sep => i_dom_sep
, i_valid => i_valid
, i_padding => i_padding
, o_valid => o_valid
, o_onehot => ctl_onehot
, o_ready => o_ready
, o_control => ctl_control
);
end architecture;
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.ace_pkg.all;
entity ace is
port
( clk : in std_logic;
reset : in std_logic;
i_mode : in mode_ty;
i_dom_sep : in domsep_ty;
i_valid : in std_logic;
i_data : in word;
i_padding : in std_logic;
o_valid : out std_logic;
o_ready : out std_logic;
o_data : out word
);
end entity;
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to:
-- Share — copy and redistribute the material in any medium or format
-- Adapt — remix, transform, and build upon the material
-- The licensor cannot revoke these freedoms as long as you follow
-- the license terms.
-- Under the following terms:
-- Attribution — You must give appropriate credit, provide a link to
-- the license, and indicate if changes were made. You may do so in
-- any reasonable manner, but not in any way that suggests the
-- licensor endorses you or your use.
-- NonCommercial — You may not use the material for commercial
-- purposes.
-- ShareAlike — If you remix, transform, or build upon the material,
-- you must distribute your contributions under the same license as
-- the original.
-- No additional restrictions — You may not apply legal terms or
-- technological measures that legally restrict others from doing
-- anything the license permits.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
package ace_pkg is
--for constants
constant lfsr_c_sz : integer := 7;
subtype lfsr_c_output is std_logic_vector(0 to lfsr_c_sz+2);
------------------------------------------------------------
constant half_word_sz : natural := 32;
constant word_sz : natural := 2*half_word_sz;
subtype half_word is std_logic_vector( 0 to half_word_sz - 1 );
subtype word is std_logic_vector( 0 to word_sz - 1 );
type word_vector is array( natural range <> ) of word;
type half_word_vector is array( natural range <> ) of half_word;
------------------------------------------------------------
-- A, B, C, D, E
constant state_sz : natural := 320;
constant word_max_idx : natural := state_sz / word_sz - 1;
constant half_word_max_idx : natural := state_sz / half_word_sz - 1;
constant key_sz : natural := 128;
constant nonce_sz : natural := 128;
subtype word_state_ty is word_vector ( 0 to word_max_idx );
constant a_idx : natural := 0;
constant b_idx : natural := 1;
constant c_idx : natural := 2;
constant d_idx : natural := 3;
constant e_idx : natural := 4;
subtype half_word_data is half_word_vector ( 0 to 1 );
subtype half_word_state_ty is half_word_vector ( 0 to half_word_max_idx );
constant a0_idx : natural := 1;
constant a1_idx : natural := 0;
constant b0_idx : natural := 3;
constant b1_idx : natural := 2;
constant c0_idx : natural := 5;
constant c1_idx : natural := 4;
constant d0_idx : natural := 7;
constant d1_idx : natural := 6;
constant e0_idx : natural := 9;
constant e1_idx : natural := 8;
function b2x( b : boolean ) return std_logic;
function half_words_to_words( st : half_word_state_ty ) return word_state_ty;
function words_to_half_words( st : word_state_ty ) return half_word_state_ty;
------------------------------------------------------------
-- mode
subtype mode_ty is std_logic_vector( 1 downto 0 ); -- top lvl input
constant encrypt_mode : mode_ty := ( 1 => '0', 0 => '0' );
constant decrypt_mode : mode_ty := ( 1 => '0', 0 => '1' );
constant absorb_mode : mode_ty := ( 1 => '1', 0 => '0' );
constant squeeze_mode : mode_ty := ( 1 => '1', 0 => '1' );
subtype domsep_ty is std_logic_vector( 1 downto 0 ); -- top lvl input
-- derived control (from counter and more)
subtype ace_ctl_ty is std_logic_vector( 7 downto 0 );
constant absorb_idx : natural := 0;
constant replace_idx : natural := 1;
constant output_idx : natural := 2;
constant endstep_idx : natural := 3;
constant permoff_idx : natural := 4;
constant squeeze_idx : natural := 5;
constant lfsr_c_reset_idx : natural := 6;
constant lfsr_c_en_idx : natural := 7;
-- extras cntl for load, init, fin, tag, sqeeze
subtype onehot_ty is std_logic_vector( 3 downto 0); -- extrs cntl for load, init, fin, tag, sqeeze
------------------------------------------------------------
-- round and step counters
-- use last bit for end ACE perm - for o_ready
-- -> i_valid will reset the counter!
-- counter only runs if msb = 0
constant bits_counter : natural := 8;
subtype count_ty is unsigned( bits_counter - 1 downto 0 );
----------------------------------------------------------------------
-- standard vhdl operators
-- function "sll"( a : half_word; n : natural ) return half_word;
function onehot_rotate (a : onehot_ty) return onehot_ty;
function vector_to_data ( st : half_word_data ) return word;
function data_to_vector ( st : word ) return half_word_data;
----------------------------------------------------------------------
end package;
----------------------------------------------------------------------
--
----------------------------------------------------------------------
package body ace_pkg is
function onehot_rotate (a : onehot_ty)
return onehot_ty
is
variable z : onehot_ty;
begin
z(onehot_ty'high downto 1) := a(onehot_ty'high - 1 downto 0);
z(0) := a(onehot_ty'high);
return z;
end function;
function b2x( b : boolean ) return std_logic is
begin
if b then
return '1';
else
return '0';
end if;
end function;
------------------------------------------------------------
-- standard vhdl operators cast to state
------------------------------------------------------------
-- function "sll"( a : half_word; n : natural ) return half_word is
-- begin
-- return half_word( std_logic_vector( a ) sll n );
-- end function;
------------------------------------------------------------
-- state functions
------------------------------------------------------------
function half_words_to_words( st : half_word_state_ty ) ---- CHECK THIS!!!!!! PLEASE
return word_state_ty
is
variable i : natural;
variable z : word_state_ty;
begin
main_loop : for i in 0 to word_max_idx loop
z(i)(0 to half_word_sz - 1) := st(2*i);
z(i)(half_word_sz to word_sz - 1) := st(2*i+1);
end loop;
return z;
end function;
function words_to_half_words( st : word_state_ty )
return half_word_state_ty
is
variable i : natural;
variable z : half_word_state_ty;
begin
main_loop : for i in 0 to word_max_idx loop
z(2*i) := st(i)(0 to half_word_sz - 1);
z(2*i+1) := st(i)(half_word_sz to word_sz - 1);
end loop;
return z;
end function;
function data_to_vector( st : word )
return half_word_data
is
variable z : half_word_data;
begin
z(0) := st(0 to half_word_sz - 1);
z(1) := st(half_word_sz to word_sz - 1);
return z;
end function;
function vector_to_data( st : half_word_data )
return word
is
variable z : word;
begin
z(0 to half_word_sz - 1) := st(0);
z(half_word_sz to word_sz - 1) := st(1);
return z;
end function;
end package body;
if { $gui_mode } {
add wave clk
add wave reset
add wave i_mode
add wave i_dom_sep
add wave o_ready
add wave i_valid
add wave i_data
add wave i_padding
add wave o_valid
add wave o_data
if { $sim_mode eq "PROG_MODE" } then {
add wave -noupdate -divider -height 32 STUFF
add wave /uut/u_ctl/state
add wave /uut/u_ctl/o_ready
add wave /uut/u_ctl/i_valid
add wave /uut/u_dp/i_data
add wave /uut/u_ctl/o_valid
add wave /uut/u_dp/o_data
add wave -noupdate -divider -height 32 DP
add wave -radix binary /uut/u_dp/ctl_const
add wave /uut/u_dp/i_data
add wave /uut/u_dp/o_data
add wave -radix binary /uut/u_dp/ctl_const
add wave /uut/u_dp/lfsr_c_en
add wave /uut/u_dp/lfsr_c_reset
add wave /uut/u_dp/permoff
add wave /uut/u_dp/endstep
add wave /uut/u_dp/absorb
add wave /uut/u_dp/replace
add wave /uut/u_dp/output
add wave /uut/u_dp/dsxor
add wave /uut/u_dp/post_input
add wave /uut/u_dp/pre_round
add wave /uut/u_dp/post_round
add wave /uut/u_dp/post_xor
add wave /uut/u_dp/post_step_const
add wave /uut/u_dp/post_linear
add wave /uut/u_dp/ace_path
add wave /uut/u_dp/ace_state
add wave -noupdate -divider -height 32 CTL
add wave /uut/u_ctl/state
add wave -radix unsigned /uut/u_ctl/count
add wave /uut/u_ctl/i_valid
add wave /uut/u_ctl/o_valid
add wave /uut/u_ctl/o_ready
add wave -radix binary /uut/u_ctl/onehot
add wave /uut/u_ctl/lfsr_c_reset
add wave -radix binary /uut/u_ctl/i_mode
add wave -radix binary /uut/u_ctl/i_dom_sep
}
}
vcd file ace.vcd
vcd add /ace_tb/uut/*
vcd add -r *
vcd on
run -all
vcd checkpoint
vcd off
vcd flush
if { $gui_mode } {
wave zoom full
} else {
exit
}
-- This work is licensed under a Creative Commons
-- Attribution-NonCommercial-ShareAlike 4.0 International License.
-- http://creativecommons.org/licenses/by-nc-sa/4.0
-- Mark D. Aagaard
-- Riham AlTawy
-- Guang Gong
-- Kalikinkar Mandal
-- Raghvendra Rohit
-- Marat Sattarov
-- http://comsec.uwaterloo.ca
-- This is a human-readable summary of (and not a substitute for) the license.
-- You are free to: