Commit 9cd651e3 by Wentao Zhang Committed by Sebastian Renner

knot

parent b95155c4
#define CRYPTO_KEYBYTES 16 //
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include"auxFormat.h"
void P256(unsigned int *s, unsigned char *rc, unsigned char rounds)
{
unsigned int reg1, reg2;
asm volatile (
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,2,4,6 sbox1(s[0], s[2], s[4], s[6]); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_2], %[S_0] \n\t"
"eors %[reg1], %[S_4], %[reg1] \n\t"
"orrs %[S_4], %[S_2], %[S_4] \n\t"
"eors %[S_0], %[S_6], %[S_0] \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_2], %[S_6] \n\t"
"eors %[S_6], %[S_6], %[reg1] \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_2], %[S_4], %[reg2] \n\t"
"eors %[S_2], %[reg1], %[S_2] \n\t"
"/*sbox first column 1,3,5,7 sbox1(s[1], s[3], s[5], s[7]) */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_3], %[S_1] \n\t"
"eors %[reg1], %[S_5], %[reg1] \n\t"
"orrs %[S_5], %[S_3], %[S_5] \n\t"
"eors %[S_1], %[S_7], %[S_1] \n\t"
"eors %[S_5], %[S_5], %[S_1] \n\t"
"eors %[reg2], %[S_3], %[S_7] \n\t"
"eors %[S_7], %[S_7], %[reg1] \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[reg2],%[S_1] \n\t"
"ands %[S_3], %[S_5], %[reg2] \n\t"
"eors %[S_3], %[reg1], %[S_3] \n\t"
"enc_loop2: \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_3], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_3], %[S_4], %[reg2] \n\t"
"eors %[S_3], %[reg1], %[S_3] \n\t"
"/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_2], %[reg1],%[S_2] \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_2], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_2], %[S_4], %[reg2] \n\t"
"eors %[S_2], %[reg1], %[S_2] \n\t"
"/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_3], %[reg1],%[S_3] \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_3], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_3], %[S_4], %[reg2] \n\t"
"eors %[S_3], %[reg1], %[S_3] \n\t"
"/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_2], %[reg1],%[S_2] \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_2], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_2], %[S_4], %[reg2] \n\t"
"eors %[S_2], %[reg1], %[S_2] \n\t"
"/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_3], %[reg1],%[S_3] \n\t"
"/*loop control*/ \n\t"
"subs %[ro], %[ro], #1 \n\t"
"bne enc_loop2 \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_3], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_3], %[S_4], %[reg2] \n\t"
"eors %[S_3], %[reg1], %[S_3] \n\t"
"/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_2], %[reg1],%[S_2] \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_2], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_2], %[S_4], %[reg2] \n\t"
"eors %[S_2], %[reg1], %[S_2] \n\t"
"/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_3], %[reg1],%[S_3] \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\
"/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_3], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_3], %[S_4], %[reg2] \n\t"
"eors %[S_3], %[reg1], %[S_3] \n\t"
"/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_2], %[reg1],%[S_2] \n\t"
"ROR %[S_3], #31 \n\t"
"ROR %[S_4], #28 \n\t"
"ROR %[S_5], #28 \n\t"
"ROR %[S_6], #20 \n\t"
"ROR %[S_7], #19 \n\t"
: /* output variables - including inputs that are changed */
[ro] "+r" (rounds),[reg1] "=r" (reg1), [reg2] "=r" (reg2), [rc] "+r" (rc),
[S_0] "+r" (s[0]), [S_2] "+r" (s[2]), [S_4] "+r" (s[4]), [S_6] "+r" (s[6]) ,
[S_1] "+r" (s[1]), [S_3] "+r" (s[3]), [S_5] "+r" (s[5]), [S_7] "+r" (s[7])
: /* input variables */
: /* clobber registers for temporary values */
);
}
void packFormat(u32 * out, const u8 * in) {
u32 t0 = U32BIG(((u32* )in)[0]);
u32 t1 = U32BIG(((u32* )in)[1]);
u32 r0, r1;
r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1);
r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2);
r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4);
r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); //t0 odd even
r1 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= r1 ^ (r1 << 1);
r1 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= r1 ^ (r1 << 2);
r1 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= r1 ^ (r1 << 4);
r1 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= r1 ^ (r1 << 8); //t1 odd even
out[0] = (t1 & 0xFFFF0000) | (t0 >> 16); // t1.odd|t0.odd
out[1] = (t1 << 16) | (t0 & 0x0000FFFF); // t1.even|t0.even
}
void unpackFormat(u8 * out, u32 * in) {
u32 t[2] = { 0 };
t[1] = (in[0] & 0xFFFF0000) | (in[1] >> 16);
t[0] = (in[1] & 0x0000FFFF) | (in[0] << 16);
u32 r0, r1;
r0 = (t[0] ^ (t[0] >> 8)) & 0x0000FF00, t[0] ^= r0 ^ (r0 << 8);
r0 = (t[0] ^ (t[0] >> 4)) & 0x00F000F0, t[0] ^= r0 ^ (r0 << 4);
r0 = (t[0] ^ (t[0] >> 2)) & 0x0C0C0C0C, t[0] ^= r0 ^ (r0 << 2);
r0 = (t[0] ^ (t[0] >> 1)) & 0x22222222, t[0] ^= r0 ^ (r0 << 1);
r1 = (t[1] ^ (t[1] >> 8)) & 0x0000FF00, t[1] ^= r1 ^ (r1 << 8);
r1 = (t[1] ^ (t[1] >> 4)) & 0x00F000F0, t[1] ^= r1 ^ (r1 << 4);
r1 = (t[1] ^ (t[1] >> 2)) & 0x0C0C0C0C, t[1] ^= r1 ^ (r1 << 2);
r1 = (t[1] ^ (t[1] >> 1)) & 0x22222222, t[1] ^= r1 ^ (r1 << 1);
memcpy(out, t, 8 * sizeof(unsigned char));
}
#include"api.h"
#include <string.h>
#define U32BIG(x) (x)
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
void P256(unsigned int *s, unsigned char *rc, unsigned char rounds);
void packFormat(u32 * out, const u8 * in);
void unpackFormat(u8 * out, u32 * in);
#include"auxFormat.h"
//#define RATE (64 / 8)
#define RATE 8
/*
#define PR0_ROUNDS 25
#define PR_ROUNDS 13
#define PRF_ROUNDS 15
* */
#define PR0_ROUNDS 12
#define PR_ROUNDS 6
#define PRF_ROUNDS 7
unsigned char constant6Format[52] = { 0x01, 0x10, 0x02, 0x20, 0x04, 0x41, 0x11,
0x12, 0x22, 0x24, 0x45, 0x50, 0x03, 0x30, 0x06, 0x61, 0x15, 0x53, 0x33,
0x36, 0x67, 0x74, 0x46, 0x60, 0x05, 0x51, 0x13, 0x32, 0x26, 0x65, 0x54,
0x42, 0x21, 0x14, 0x43, 0x31, 0x16, 0x63, 0x35, 0x57, 0x72, 0x27, 0x75,
0x56, 0x62, 0x25, 0x55, 0x52, 0x23, 0x34, 0x47, 0x70, };
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
u32 s[8] = { 0 };
u32 dataFormat[2] = { 0 };
u8 tempData[16];
//initialization
*clen = mlen + CRYPTO_ABYTES;
packFormat(s, npub);
packFormat((s + 2), (npub + 8));
packFormat((s + 4), k);
packFormat((s + 6), (k + 8));
P256(s, constant6Format, PR0_ROUNDS);
// process associated data
if (adlen) {
while (adlen >= RATE) {
packFormat(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
P256(s, constant6Format, PR_ROUNDS);
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
P256(s, constant6Format, PR_ROUNDS);
}
s[6] ^= 0x80000000;
//Encryption:
if (mlen) {
while (mlen >= RATE) {
packFormat(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
unpackFormat(c, s);
P256(s, constant6Format, PR_ROUNDS);
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen * sizeof(unsigned char));
tempData[mlen] = 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
unpackFormat(tempData, s);
memcpy(c, tempData, mlen * sizeof(unsigned char));
c += mlen;
}
// finalization
P256(s, constant6Format, PRF_ROUNDS);
unpackFormat(tempData, s);
unpackFormat((tempData + 8), (s + 2));
memcpy(c, tempData, CRYPTO_ABYTES);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u32 s[8] = { 0 };
u32 dataFormat[4] = { 0 };
u32 dataFormat_1[2] = { 0 };
u8 tempU8[32] = { 0 };
u8 tempData[16];
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
//initialization
packFormat(s, npub);
packFormat((s + 2), (npub + 8));
packFormat((s + 4), k);
packFormat((s + 6), (k + 8));
P256(s, constant6Format, PR0_ROUNDS);
// process associated data
if (adlen) {
while (adlen >= RATE) {
packFormat(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
P256(s, constant6Format, PR_ROUNDS);
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
P256(s, constant6Format, PR_ROUNDS);
}
s[6] ^= 0x80000000;
// process c
clen = clen - CRYPTO_KEYBYTES;
if (clen) {
while (clen >= RATE) {
packFormat(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
unpackFormat(m, dataFormat_1);
s[0] = dataFormat[0];
s[1] = dataFormat[1];
P256(s, constant6Format, PR_ROUNDS);
clen -= RATE;
m += RATE;
c += RATE;
}
unpackFormat(tempU8, s);
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, c, clen * sizeof(unsigned char));
tempData[clen] = 0x01;
U32BIG(((u32*)tempU8)[0]) ^= U32BIG(
((u32* )tempData)[0]);
U32BIG(((u32*)tempU8)[1]) ^= U32BIG(
((u32* )tempData)[1]);
memcpy(m, tempU8, clen * sizeof(unsigned char));
memcpy(tempU8, tempData, clen * sizeof(unsigned char));
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
c += clen;
packFormat(s, tempU8);
}
// finalization
P256(s, constant6Format, PRF_ROUNDS);
unpackFormat(tempData, s);
unpackFormat((tempData + 8), (s + 2));
if (memcmp((void*) tempData, (void*) c, CRYPTO_ABYTES)) {
memset(m, 0, sizeof(unsigned char) * (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <stdio.h>
#include "api.h"
#include <string.h>
typedef unsigned long long u64;
typedef unsigned char u8;
typedef long long i64;
#define RATE 8
#define PR0_ROUNDS 52
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
#define LOTR64(x,n) (((x)<<(n))|((x)>>(64-(n))))
#define U64BIG(x) (x)
static const u8 constant6[52] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03,
0x06, 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29, 0x13, 0x27, 0x0f,
0x1e, 0x3d, 0x3a, 0x34, 0x28, 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32,
0x24, 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37, 0x2e, 0x1d, 0x3b,
0x36, 0x2c, 0x19, 0x33, 0x26, 0x0d, 0x1a, 0x35, 0x2a };
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define ROUND256(i) {\
s[0]^=constant6[i];\
sbox(s[0], s[1], s[2], s[3], x5, x6, x7);\
s[1]=LOTR64(x5,1);\
s[2]=LOTR64(x6,8);\
s[3]=LOTR64(x7,25);\
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
*clen = mlen + CRYPTO_ABYTES;
u64 x7, x6, x5,i;
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u8 tempData[8] = { 0 };
u64 s[4] = { 0 };
// initialization
memcpy(s, npub, CRYPTO_NPUBBYTES);
memcpy(s + 2, k, CRYPTO_KEYBYTES);
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND256(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
s[0] ^= U64BIG(((u64*)ad)[0]);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen );
tempData[adlen] = 0x01;
s[0] ^= U64BIG(((u64*)tempData)[0]);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(i);
}
}
s[3] ^= 0x8000000000000000;
// process plaintext
if (mlen) {
while (mlen >= RATE) {
s[0] ^= U64BIG(*(u64* )m);
memcpy(c, s, RATE );
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(i);
}
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, m, mlen);
tempData[mlen] = 0x01;
s[0] ^= U64BIG(((u64*)tempData)[0]);
memcpy(c, s, mlen );
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND256(i);
}
// return tag
memcpy(c, s, CRYPTO_ABYTES);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
if (clen < CRYPTO_KEYBYTES)
return -1;
*mlen = clen - CRYPTO_KEYBYTES;
u64 x7, x6, x5, i;
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u8 tempData[8] = { 0 };
u64 s[4] = { 0 };
// initialization
memcpy(s, npub, CRYPTO_NPUBBYTES);
memcpy(s + 2, k, CRYPTO_KEYBYTES);
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND256(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
s[0] ^= U64BIG(((u64*)ad)[0]);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen );
tempData[adlen] = 0x01;
s[0] ^= U64BIG(((u64*)tempData)[0]);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(i);
}
}
s[3] ^= 0x8000000000000000;
clen -= CRYPTO_ABYTES;
if (clen) {
while (clen >= RATE) {
U64BIG(*(u64*)(m)) = s[0] ^ U64BIG(*(u64*)(c));
memcpy(s, c, RATE );
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(i);
}
clen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, c, clen );
tempData[clen] = 0x01;
s[0] ^= U64BIG(*(u64*)(tempData));
memcpy(m, s, clen );
memcpy(s, c, clen );
c += clen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND256(i);
}
if (memcmp((void*)s, (void*)c, CRYPTO_ABYTES)) {
memset(m, 0, (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include"api.h"
typedef unsigned char u8;
typedef unsigned long long u64;
typedef unsigned int u32;
#define PR0_ROUNDS 76
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
#define RATE 24
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n)))
#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n)))
#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32)
#define ROTR96MORE322(a,b,n) (b<<n|(u64)a<<(n-32)|b>>(96-n))
#define U32BIG(x) (x)
#define U64BIG(x) (x)
u8 constant7[76] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, 0x0c,
0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f,
0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, 0x2c,
0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27,
0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, 0x0e,
0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d,
0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b };
#define ROUND384(i){\
x00 ^= constant7[i];\
sbox(x00, x10, x20, x30, x50, x60, x70);\
sbox(x01, x11, x21, x31, x51, x61, x71);\
x11 = ROTR961(x51, x50, 1);\
x10 = ROTR962(x51, x50, 1);\
x21 = ROTR961(x61, x60, 8);\
x20 = ROTR962(x61, x60, 8);\
x31 = ROTR96MORE321(x71, x70, 55);\
x30 = ROTR96MORE322(x71, x70, 55);\
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
u64 i;
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0;
u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0;
u8 tempData[24] = { 0 };
u8 tempData1[24] = { 0 };
u64 x50, x60, x70;
u32 x51, x61, x71;
*clen = mlen + CRYPTO_KEYBYTES;
// initialization
x00 = U64BIG(*(u64*)(npub));
x01 = U32BIG(*(u32*)(npub + 8));
x10 = ((u64)U32BIG(*(u32*)(k)) << 32)
| ((u64)U32BIG(*(u32*)(npub + 12)));
x11 = U32BIG(*(u32*)(k + 4));
x20 = U64BIG(*(u64*)(k + 8));
x30 = 0;
x31 = 0x80000000;
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
x00 ^= U64BIG(*(u64*)(ad));
x01 ^= U32BIG(*(u32*)(ad + 8));
x10 ^= ((u64)U32BIG(*(u32*)(ad + 16)) << 32) | ((u64)U32BIG(*(u32*)(ad + 12)));
x11 ^= U32BIG(*(u32*)(ad + 20));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
x10 ^= ((u64)U32BIG(*(u32*)(tempData + 16)) << 32) | ((u64)U32BIG(*(u32*)(tempData + 12)));
x11 ^= U32BIG(*(u32*)(tempData + 20));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
x31 ^= 0x80000000;
// process plaintext
if (mlen) {
while (mlen >= RATE) {
x00 ^= U64BIG(*(u64*)(m));
x01 ^= U32BIG(*(u32*)(m + 8));
x10 ^= ((u64)U32BIG(*(u32*)(m + 16)) << 32) | ((u64)U32BIG(*(u32*)(m + 12)));
x11 ^= U32BIG(*(u32*)(m + 20));
*(u64*)c = U64BIG(x00);
*(u32*)(c + 8) = U32BIG(x01);
*(u32*)(c + 12) = U32BIG(x10);
*(u32*)(c + 16) = U32BIG(x10 >> 32);
*(u32*)(c + 20) = U32BIG(x11);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, m, mlen);
tempData[mlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
x10 ^= ((u64)U32BIG(*(u32*)(tempData + 16)) << 32) | ((u64)U32BIG(*(u32*)(tempData + 12)));
x11 ^= U32BIG(*(u32*)(tempData + 20));
*(u64*)tempData1 = U64BIG(x00);
*(u32*)(tempData1 + 8) = U32BIG(x01);
*(u32*)(tempData1 + 12) = U32BIG(x10);
*(u32*)(tempData1 + 16) = U32BIG(x10 >> 32);
*(u32*)(tempData1 + 20) = U32BIG(x11);
memcpy(c, tempData1, mlen * sizeof(unsigned char));
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return tag
*(u64*)tempData = U64BIG(x00);
*(u32*)(tempData + 8) = U32BIG(x01);
*(u32*)(tempData + 12) = U32BIG(x10);
memcpy(c, tempData, CRYPTO_ABYTES);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
u64 i;
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0;
u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0;
u8 tempData[24] = { 0 };
u8 tempData1[24] = { 0 };
u64 x50, x60, x70;
u32 x51, x61, x71;
// initialization
x00 = U64BIG(*(u64*)(npub));
x01 = U32BIG(*(u32*)(npub + 8));
x10 = ((u64)U32BIG(*(u32*)(k)) << 32)
| ((u64)U32BIG(*(u32*)(npub + 12)));
x11 = U32BIG(*(u32*)(k + 4));
x20 = U64BIG(*(u64*)(k + 8));
x30 = 0;
x31 = 0x80000000;
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
x00 ^= U64BIG(*(u64*)(ad));
x01 ^= U32BIG(*(u32*)(ad + 8));
x10 ^= ((u64)U32BIG(*(u32*)(ad + 16)) << 32) | ((u64)U32BIG(*(u32*)(ad + 12)));
x11 ^= U32BIG(*(u32*)(ad + 20));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
x10 ^= ((u64)U32BIG(*(u32*)(tempData + 16)) << 32) | ((u64)U32BIG(*(u32*)(tempData + 12)));
x11 ^= U32BIG(*(u32*)(tempData + 20));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
x31 ^= 0x80000000;
// process plaintext
clen -= CRYPTO_KEYBYTES;
if (clen) {
while (clen >= RATE) {
*(u64*)(m) = U64BIG(x00) ^ (*(u64*)(c));
*(u32*)(m + 8) = U32BIG(x01) ^ (*(u32*)(c + 8));
*(u32*)(m + 12) = U32BIG(x10) ^ (*(u32*)(c + 12));
*(u32*)(m + 16) = U32BIG(x10 >> 32) ^ (*(u32*)(c + 16));
*(u32*)(m + 20) = U32BIG(x11) ^ (*(u32*)(c + 20));
x00 = U64BIG(*(u64*)(c));
x01 = U32BIG(*(u32*)(c + 8));
x10 = U64BIG(*(u64*)(c + 12));
x11 = U32BIG(*(u32*)(c + 20));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
clen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, c, clen * sizeof(unsigned char));
tempData[clen] = 0x01;
*(u64*)(tempData1) = U64BIG(x00) ^ (*(u64*)(tempData));
*(u32*)(tempData1 + 8) = U32BIG(x01) ^ (*(u32*)(tempData + 8));
//*(u64*)(tempData1 + 12) = U64BIG(x10) ^ (*(u64*)(tempData + 12));
*(u32*)(tempData1 + 12) = U32BIG(x10) ^ (*(u32*)(tempData + 12));
*(u32*)(tempData1 + 16) = U32BIG(x10 >> 32) ^ (*(u32*)(tempData + 16));
*(u32*)(tempData1 + 20) = U32BIG(x11) ^ (*(u32*)(tempData + 20));
memcpy(m, tempData1, clen * sizeof(unsigned char));
memcpy(tempData1, c, clen * sizeof(unsigned char));
x00 = U64BIG(*(u64*)(tempData1));
x01 = U32BIG(*(u32*)(tempData1 + 8));
x10 = ((u64)U32BIG(*(u32*)(tempData1 + 16)) << 32) | ((u64)U32BIG(*(u32*)(tempData1 + 12)));
x11 = U32BIG(*(u32*)(tempData1 + 20));
c += clen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return -1 if verification fails
*(u64*)(tempData1) = U64BIG(x00);
*(u32*)(tempData1 + 8) = U32BIG(x01);
*(u32*)(tempData1 + 12) = U32BIG(x10);
if (memcmp((void*)tempData1, (void*)c, CRYPTO_ABYTES)) {
memset(m, 0, sizeof(unsigned char) * (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include"api.h"
#include <string.h>
typedef unsigned char u8;
typedef unsigned long long u64;
typedef unsigned int u32;
#define PR0_ROUNDS 76
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
#define RATE 24
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n)))
#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n)))
#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32)
#define ROTR96MORE322(a,b,n) (b<<n|(u64)a<<(n-32)|b>>(96-n))
#define U32BIG(x) (x)
#define U64BIG(x) (x)
u8 constant7[76] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, 0x0c,
0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f,
0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, 0x2c,
0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27,
0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, 0x0e,
0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d,
0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b };
#define ROUND384(i){\
x00 ^= constant7[i];\
sbox(x00, x10, x20, x30, x50, x60, x70);\
sbox(x01, x11, x21, x31, x51, x61, x71);\
x11 = ROTR961(x51, x50, 1);\
x10 = ROTR962(x51, x50, 1);\
x21 = ROTR961(x61, x60, 8);\
x20 = ROTR962(x61, x60, 8);\
x31 = ROTR96MORE321(x71, x70, 55);\
x30 = ROTR96MORE322(x71, x70, 55);\
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
u64 i;
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0;
u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0;
u8 tempData[24] = { 0 };
u8 tempData1[24] = { 0 };
u64 x50, x60, x70;
u32 x51, x61, x71;
*clen = mlen + CRYPTO_KEYBYTES;
// initialization
x00 = U64BIG(*(u64*)(npub));
x01 = U32BIG(*(u32*)(npub + 8));
x10 = ((u64)U32BIG(*(u32*)(k)) << 32)
| ((u64)U32BIG(*(u32*)(npub + 12)));
x11 = U32BIG(*(u32*)(k + 4));
x20 = U64BIG(*(u64*)(k + 8));
x30 = 0;
x31 = 0x80000000;
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
x00 ^= U64BIG(*(u64*)(ad));
x01 ^= U32BIG(*(u32*)(ad + 8));
x10 ^= U64BIG(*(u64*)(ad + 12));
x11 ^= U32BIG(*(u32*)(ad + 20));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
x10 ^= U64BIG(*(u64*)(tempData + 12));
x11 ^= U32BIG(*(u32*)(tempData + 20));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
x31 ^= 0x80000000;
// process plaintext
if (mlen) {
while (mlen >= RATE) {
x00 ^= U64BIG(*(u64*)(m));
x01 ^= U32BIG(*(u32*)(m + 8));
x10 ^= U64BIG(*(u64*)(m + 12));
x11 ^= U32BIG(*(u32*)(m + 20));
*(u64*)c = U64BIG(x00);
*(u32*)(c + 8) = U32BIG(x01);
*(u64*)(c + 12) = U64BIG(x10);
*(u32*)(c + 20) = U32BIG(x11);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, m, mlen);
tempData[mlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
x10 ^= U64BIG(*(u64*)(tempData + 12));
x11 ^= U32BIG(*(u32*)(tempData + 20));
*(u64*)tempData1 = U64BIG(x00);
*(u32*)(tempData1 + 8) = U32BIG(x01);
*(u64*)(tempData1 + 12) = U64BIG(x10);
*(u32*)(tempData1 + 20) = U32BIG(x11);
memcpy(c, tempData1, mlen * sizeof(unsigned char));
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return tag
*(u64*)c = U64BIG(x00);
*(u32*)(c + 8) = U32BIG(x01);
*(u32*)(c + 12) = U32BIG(x10);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
u64 i;
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0;
u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0;
u8 tempData[24] = { 0 };
u8 tempData1[24] = { 0 };
u64 x50, x60, x70;
u32 x51, x61, x71;
// initialization
x00 = U64BIG(*(u64*)(npub));
x01 = U32BIG(*(u32*)(npub + 8));
x10 = ((u64)U32BIG(*(u32*)(k)) << 32)
| ((u64)U32BIG(*(u32*)(npub + 12)));
x11 = U32BIG(*(u32*)(k + 4));
x20 = U64BIG(*(u64*)(k + 8));
x30 = 0;
x31 = 0x80000000;
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
x00 ^= U64BIG(*(u64*)(ad));
x01 ^= U32BIG(*(u32*)(ad + 8));
x10 ^= U64BIG(*(u64*)(ad + 12));
x11 ^= U32BIG(*(u32*)(ad + 20));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
x10 ^= U64BIG(*(u64*)(tempData + 12));
x11 ^= U32BIG(*(u32*)(tempData + 20));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
x31 ^= 0x80000000;
// process plaintext
clen -= CRYPTO_KEYBYTES;
if (clen) {
while (clen >= RATE) {
*(u64*)(m) = U64BIG(x00) ^ (*(u64*)(c));
*(u32*)(m + 8) = U32BIG(x01) ^ (*(u32*)(c + 8));
*(u64*)(m + 12) = U64BIG(x10) ^ (*(u64*)(c + 12));
*(u32*)(m + 20) = U32BIG(x11) ^ (*(u32*)(c + 20));
x00 = U64BIG(*(u64*)(c));
x01 = U32BIG(*(u32*)(c + 8));
x10 = U64BIG(*(u64*)(c + 12));
x11 = U32BIG(*(u32*)(c + 20));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
clen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, c, clen * sizeof(unsigned char));
tempData[clen] = 0x01;
*(u64*)(tempData1) = U64BIG(x00) ^ (*(u64*)(tempData));
*(u32*)(tempData1 + 8) = U32BIG(x01) ^ (*(u32*)(tempData + 8));
*(u64*)(tempData1 + 12) = U64BIG(x10) ^ (*(u64*)(tempData + 12));
*(u32*)(tempData1 + 20) = U32BIG(x11) ^ (*(u32*)(tempData + 20));
memcpy(m, tempData1, clen * sizeof(unsigned char));
memcpy(tempData1, c, clen * sizeof(unsigned char));
x00 = U64BIG(*(u64*)(tempData1));
x01 = U32BIG(*(u32*)(tempData1 + 8));
x10 = U64BIG(*(u64*)(tempData1 + 12));
x11 = U32BIG(*(u32*)(tempData1 + 20));
c += clen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return -1 if verification fails
if ((*(u64*)(c) != U64BIG(x00)) || (*(u32*)(c + 8) != U32BIG(x01))
|| (*(u32*)(c + 12) != (u32)U64BIG(x10))) {
memset(m, 0, sizeof(unsigned char) * (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include"api.h"
typedef unsigned char u8;
typedef unsigned long long u64;
typedef unsigned int u32;
#define PR0_ROUNDS 76
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
#define RATE 24
#define ROTR64(x,n) (((x)>>(n))|((x)<<(64-(n))))
#define LOTR64(x,n) (((x)<<(n))|((x)>>(64-(n))))
#define ROTR32(x,n) (((x)>>(n))|((x)<<(32-(n))))
#define sbox64(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t4 = b | c; t5 = d ^ t1; g = t4 ^ t5; t6 = b ^ d; t7 = t3 & t5; a = t6 ^ t7; t8 = g & t6; f = t3 ^ t8; \
}
#define sbox32(a, b, c, d, f, g, h) \
{ \
t_1 = ~a; t_2 = b & t_1;t_3 = c ^ t_2; h = d ^ t_3; t_4 = b | c; t_5 = d ^ t_1; g = t_4 ^ t_5; t_6 = b ^ d; t_7 = t_3 & t_5; a = t_6 ^ t_7; t_8 = g & t_6; f = t_3 ^ t_8; \
}
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n)))
#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n)))
#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32)
#define ROTR96MORE322(a,b,n) (b<<n|(u64)a<<(n-32)|b>>(96-n))
#define U32BIG(x) (x)
#define U64BIG(x) (x)
u8 constant7[76] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06,
0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47,
0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16,
0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53,
0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07,
0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26,
0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b };
#define ROUND384(i){\
s[0] ^= constant7[i]; \
sbox64(U64BIG(*(u64*)(s)), U64BIG(*(u64*)(s+3)), U64BIG(*(u64*)(s+6)), U64BIG(*(u64*)(s+9)), x50, x60, x70); \
sbox32(s[2], s[5], s[8], s[11], x51, x61, x71); \
s[5] = ROTR961(x51, x50, 1); \
U64BIG(*(u64*)(s + 3)) = ROTR962(x51, x50, 1); \
s[8] = ROTR961(x61, x60, 8); \
U64BIG(*(u64*)(s + 6)) = ROTR962(x61, x60, 8); \
s[11] = ROTR96MORE321(x71, x70, 55); \
U64BIG(*(u64*)(s + 9)) = ROTR96MORE322(x71, x70, 55); \
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
*clen = mlen + CRYPTO_ABYTES;
u32 s[12] = { 0 }, i;
u64 t1, t2, t3, t5, t6, t8, t4, t7;
u32 t_1, t_2, t_3, t_5, t_6, t_8, t_4, t_7;
u64 x50, x60, x70;
u32 x51, x61, x71;
u8 tempData[24] = { 0 };
memcpy(s, npub, CRYPTO_NPUBBYTES);
memcpy(s + CRYPTO_NPUBBYTES / 4, k, CRYPTO_KEYBYTES);
s[11] = 0x80000000;
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(ad));
U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(ad + 8));
U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(ad + 16));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen );
tempData[adlen] = 0x01;
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData));
U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(tempData + 8));
U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(tempData + 16));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
s[11] ^= 0x80000000;
// process plaintext
if (mlen) {
while (mlen >= RATE) {
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(m));
U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(m + 8));
U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(m + 16));
memcpy(c, s, RATE );
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, m, mlen );
tempData[mlen] = 0x01;
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData));
U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(tempData + 8));
U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(tempData + 16));
memcpy(c, s, mlen );
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
memcpy(c, s, CRYPTO_ABYTES);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
u32 s[12] = { 0 }, i;
u64 t1, t2, t3, t5, t6, t8, t4, t7;
u32 t_1, t_2, t_3, t_5, t_6, t_8, t_4, t_7;
u64 x50, x60, x70;
u32 x51, x61, x71;
u8 tempData[24] = { 0 };
memcpy(s, npub, CRYPTO_NPUBBYTES);
memcpy(s + CRYPTO_NPUBBYTES / 4, k, CRYPTO_KEYBYTES);
s[11] = 0x80000000;
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(ad));
U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(ad + 8));
U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(ad + 16));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen );
tempData[adlen] = 0x01;
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData));
U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(tempData + 8));
U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(tempData + 16));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
s[11] ^= 0x80000000;
/////////
clen -= CRYPTO_ABYTES;
if (clen) {
while (clen >= RATE) {
U64BIG(*(u64*)(m)) =
U64BIG(*(u64*)(s)) ^ U64BIG(*(u64*)(c));
U64BIG(*(u64*)(m + 8)) = U64BIG(
*(u64*)(s + 2)) ^ U64BIG(*(u64*)(c + 8));
U64BIG(*(u64*)(m + 16)) = U64BIG(
*(u64*)(s + 4)) ^ U64BIG(*(u64*)(c + 16));
memcpy(s, c, RATE );
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
clen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, c, clen );
tempData[clen] = 0x01;
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData));
U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(tempData + 8));
U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(tempData + 16));
memcpy(m, s, clen );
memcpy(s, c, clen );
c += clen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
if (memcmp((void*)s, (void*)c, CRYPTO_ABYTES)) {
memset(m, 0, (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 24
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 24
#define CRYPTO_ABYTES 24
#define CRYPTO_NOOVERLAP 1
#include"auxFormat.h"
void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum) {\
unsigned int t,t1,t2;
u32 rci;
rci=c[0];
ROUND384_1(rci);
t = 1;
while (lunnum--) {
rci=c[t];
ROUND384_2(rci);
t++;
rci=c[t];
ROUND384_3(rci);
t++;
rci=c[t];
ROUND384_4(rci);
t++;
}
}
void packU96FormatToThreePacket(u32 * out, u8 * in) {
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
temp0[0] = U32BIG(((u32*)in)[0]);
temp0[1] = U32BIG(((u32*)in)[0]) >> 1;
temp0[2] = U32BIG(((u32*)in)[0]) >> 2;
puckU32ToThree_1(temp0[0]);
puckU32ToThree_1(temp0[1]);
puckU32ToThree_1(temp0[2]);
temp1[0] = U32BIG(((u32*)in)[1]);
temp1[1] = U32BIG(((u32*)in)[1]) >>1;
temp1[2] = U32BIG(((u32*)in)[1]) >> 2;
puckU32ToThree_1(temp1[0]);
puckU32ToThree_1(temp1[1]);
puckU32ToThree_1(temp1[2]);
temp2[0] = U32BIG(((u32*)in)[2]);
temp2[1] = U32BIG(((u32*)in)[2]) >> 1;
temp2[2] = U32BIG(((u32*)in)[2]) >> 2;
puckU32ToThree_1(temp2[0]);
puckU32ToThree_1(temp2[1]);
puckU32ToThree_1(temp2[2]);
out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2];
out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1];
out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0];
}
void unpackU96FormatToThreePacket(u8 * out, u32 * in) {
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
u32 t[3] = { 0 };
temp0[0] = in[2] & 0x7ff;
temp0[1] = in[1] & 0x7ff;
temp0[2] = in[0] & 0x3ff;
temp1[0] = (in[0]>>10) & 0x7ff;
temp1[1] = (in[2] >>11 ) & 0x7ff;
temp1[2] = (in[1] >> 11) & 0x3ff;
temp2[0] = in[1] >> 21;
temp2[1] = in[0] >> 21;
temp2[2] = in[2] >> 22;
unpuckU32ToThree_1(temp0[0]);
unpuckU32ToThree_1(temp0[1]);
unpuckU32ToThree_1(temp0[2]);
t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2;
unpuckU32ToThree_1(temp1[0]);
unpuckU32ToThree_1(temp1[1]);
unpuckU32ToThree_1(temp1[2]);
t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2;
unpuckU32ToThree_1(temp2[0]);
unpuckU32ToThree_1(temp2[1]);
unpuckU32ToThree_1(temp2[2]);
t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2;
memcpy(out, t, 12 * sizeof(unsigned char));
}
void packU48FormatToThreePacket(u32 * out, u8 * in) {
u32 t1 = (u32)U16BIG(*(u16*)(in + 4));
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
temp0[0] = U32BIG(((u32*)in)[0]);
temp0[1] = U32BIG(((u32*)in)[0]) >> 1;
temp0[2] = U32BIG(((u32*)in)[0]) >> 2;
puckU32ToThree_1(temp0[0]);
puckU32ToThree_1(temp0[1]);
puckU32ToThree_1(temp0[2]);
temp1[0] = t1;
temp1[1] = t1 >> 1;
temp1[2] = t1 >> 2;
puckU32ToThree_1(temp1[0]);
puckU32ToThree_1(temp1[1]);
puckU32ToThree_1(temp1[2]);
out[0] = (temp1[0] << 10) | temp0[2];
out[1] = (temp1[2] << 11) | temp0[1];
out[2] = (temp1[1] << 11) | temp0[0];
}
#include"crypto_aead.h"
#include"api.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define U32BIG(x) (x)
#define U16BIG(x) (x)
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
#define puckU32ToThree_1(x){\
x &= 0x49249249;\
x = (x | (x >> 2)) & 0xc30c30c3;\
x = (x | (x >>4)) & 0x0f00f00f;\
x = (x | (x >> 8)) & 0xff0000ff;\
x = (x | (x >> 16)) & 0xfff;\
}
#define unpuckU32ToThree_1(x){\
x &= 0xfff;\
x = (x | (x << 16)) & 0xff0000ff;\
x = (x | (x << 8)) & 0x0f00f00f;\
x = (x | (x << 4)) & 0xc30c30c3;\
x = (x | (x << 2)) & 0x49249249;\
}
void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum);
#define ARC(rci) \
do { \
__asm__ __volatile__ ( \
"/*add round const s0 s1 s2 */ \n\t"\
"ands %[t1], %[rci], #0xc0\n\t" \
"eors %[S_0], %[S_0], %[t1], LSR #6 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\
"ands %[t1], %[rci], #0x38\n\t" \
"eors %[S_1], %[S_1], %[t1], LSR #3 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\
"ands %[t1], %[rci], #0x7\n\t" \
"eors %[S_3], %[S_3], %[t1] \n\t" /*s[2] ^= constant7Format[lunNum] & 0x7;*/\
: /* output variables - including inputs that are changed */\
[t1] "=r" (t1), [rci] "+r" (rci), \
[S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_3] "+r" (s[2])\
: : );\
}while (0)
#define SBOX(S1,S2,S3,S4) \
do { \
__asm__ __volatile__ ( \
"/*sbox column*/ \n\t"\
"mvns %[S_0], %[S_0] \n\t"\
"ands %[t1], %[S_2], %[S_0] \n\t"\
"eors %[t1], %[S_4], %[t1] \n\t"\
"orrs %[S_4], %[S_2], %[S_4] \n\t"\
"eors %[S_0], %[S_6], %[S_0] \n\t"\
"eors %[S_4], %[S_4], %[S_0] \n\t"\
"eors %[t2], %[S_2], %[S_6] \n\t"\
"eors %[S_6], %[S_6], %[t1] \n\t"\
"ands %[S_0], %[t1] , %[S_0] \n\t"\
"eors %[S_0], %[t2] , %[S_0] \n\t"\
"ands %[S_2], %[S_4], %[t2] \n\t"\
"eors %[S_2], %[t1] , %[S_2] \n\t"\
: /* output variables - including inputs that are changed */\
[t1] "=r" (t1), [t2] "=r" (t2),\
[S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \
: : );\
}while (0)
#define SBOX1(S1,S2,S3,S4) \
do { \
__asm__ __volatile__ ( \
"/*sbox column*/ \n\t"\
"ROR %[S_4] , #30 \n\t"\
"ROR %[S_6] , #14 \n\t"\
"mvns %[S_0], %[S_0] \n\t"\
"ands %[t1], %[S_2], %[S_0] \n\t"\
"eors %[t1], %[S_4], %[t1] \n\t"\
"orrs %[S_4], %[S_2], %[S_4] \n\t"\
"eors %[S_0], %[S_6], %[S_0] \n\t"\
"eors %[S_4], %[S_4], %[S_0] \n\t"\
"eors %[t2], %[S_2], %[S_6] \n\t"\
"eors %[S_6], %[S_6], %[t1] \n\t"\
"ands %[S_0], %[t1] , %[S_0] \n\t"\
"eors %[S_0], %[t2] , %[S_0] \n\t"\
"ands %[S_2], %[S_4], %[t2] \n\t"\
"eors %[S_2], %[t1] , %[S_2] \n\t"\
: /* output variables - including inputs that are changed */\
[t1] "=r" (t1), [t2] "=r" (t2),\
[S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \
: : );\
}while (0)
#define SBOX2(S1,S2,S3,S4) \
do { \
__asm__ __volatile__ ( \
"/*sbox column*/ \n\t"\
"ROR %[S_4] , #29 \n\t"\
"ROR %[S_6] , #14 \n\t"\
"mvns %[S_0], %[S_0] \n\t"\
"ands %[t1], %[S_2], %[S_0] \n\t"\
"eors %[t1], %[S_4], %[t1] \n\t"\
"orrs %[S_4], %[S_2], %[S_4] \n\t"\
"eors %[S_0], %[S_6], %[S_0] \n\t"\
"eors %[S_4], %[S_4], %[S_0] \n\t"\
"eors %[t2], %[S_2], %[S_6] \n\t"\
"eors %[S_6], %[S_6], %[t1] \n\t"\
"ands %[S_0], %[t1] , %[S_0] \n\t"\
"eors %[S_0], %[t2] , %[S_0] \n\t"\
"ands %[S_2], %[S_4], %[t2] \n\t"\
"eors %[S_2], %[t1] , %[S_2] \n\t"\
: /* output variables - including inputs that are changed */\
[t1] "=r" (t1), [t2] "=r" (t2),\
[S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \
: : );\
}while (0)
#define SBOX3(S1,S2,S3,S4) \
do { \
__asm__ __volatile__ ( \
"/*sbox column*/ \n\t"\
"ROR %[S_2] , #31 \n\t"\
"ROR %[S_4] , #29 \n\t"\
"ROR %[S_6] , #13 \n\t"\
"mvns %[S_0], %[S_0] \n\t"\
"ands %[t1], %[S_2], %[S_0] \n\t"\
"eors %[t1], %[S_4], %[t1] \n\t"\
"orrs %[S_4], %[S_2], %[S_4] \n\t"\
"eors %[S_0], %[S_6], %[S_0] \n\t"\
"eors %[S_4], %[S_4], %[S_0] \n\t"\
"eors %[t2], %[S_2], %[S_6] \n\t"\
"eors %[S_6], %[S_6], %[t1] \n\t"\
"ands %[S_0], %[t1] , %[S_0] \n\t"\
"eors %[S_0], %[t2] , %[S_0] \n\t"\
"ands %[S_2], %[S_4], %[t2] \n\t"\
"eors %[S_2], %[t1] , %[S_2] \n\t"\
: /* output variables - including inputs that are changed */\
[t1] "=r" (t1), [t2] "=r" (t2),\
[S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \
: : );\
}while (0)
#define ROUND384_1(rci) {\
ARC(rci);\
SBOX(s[0], s[3], s[6], s[9] );\
SBOX(s[1], s[4], s[7], s[10]);\
SBOX(s[2], s[5], s[8], s[11]);\
}
#define ROUND384_2(rci) {\
ARC(rci);\
SBOX1(s[0], s[4], s[8], s[10] );\
SBOX2(s[1], s[5], s[6], s[11]);\
SBOX3(s[2], s[3], s[7], s[9]);\
}
#define ROUND384_3(rci) {\
ARC(rci);\
SBOX1(s[0], s[5], s[7], s[11]);\
SBOX2(s[1], s[3], s[8], s[9]);\
SBOX3(s[2], s[4], s[6], s[10]);\
}
#define ROUND384_4(rci) {\
ARC(rci);\
SBOX1(s[0], s[3], s[6], s[9]);\
SBOX2(s[1], s[4], s[7], s[10]);\
SBOX3(s[2], s[5], s[8], s[11]);\
}
#define P384_1( s, round, lunNum) {\
u32 t1;\
ROUND384_Three(s,round,lunNum);\
__asm__ __volatile__ ( \
"/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\
"mov %[t1], %[S_3] \n\t"\
"mov %[S_3], %[S_4] \n\t"\
"mov %[S_4], %[S_5] \n\t"\
"ROR %[S_5], %[t1] , #31 \n\t"\
"/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t"\
"mov %[t1], %[S_8] \n\t"\
"ROR %[S_8], %[S_7] , #29 \n\t"\
"ROR %[S_7], %[S_6] , #29 \n\t"\
"ROR %[S_6], %[t1] , #30 \n\t"\
"/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t"\
"mov %[t1], %[S_9] \n\t"\
"ROR %[S_9], %[S_10] , #14 \n\t"\
"ROR %[S_10], %[S_11] , #14 \n\t"\
"ROR %[S_11], %[t1] , #13 \n\t"\
: /* output variables - including inputs that are changed */\
[t1] "=r" (t1),\
[S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\
[S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\
[S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\
: : );\
}
#define P384_2( s, round, lunNum) {\
u32 t1,rci;\
ROUND384_Three(s,round,lunNum);\
rci=round[lunNum*3+1];\
ROUND384_2(rci);\
__asm__ __volatile__ ( \
"/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\
"mov %[t1], %[S_4] \n\t"\
"mov %[S_4], %[S_3] \n\t"\
"mov %[S_3], %[S_5] \n\t"\
"ROR %[S_5], %[t1] , #31 \n\t"\
"/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t"\
"mov %[t1], %[S_8] \n\t"\
"ROR %[S_8], %[S_6] , #29 \n\t"\
"ROR %[S_6], %[S_7] , #30 \n\t"\
"ROR %[S_7], %[t1] , #29 \n\t"\
"/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t"\
"mov %[t1], %[S_10] \n\t"\
"ROR %[S_10], %[S_9] , #14 \n\t"\
"ROR %[S_9], %[S_11] , #14 \n\t"\
"ROR %[S_11], %[t1] , #13 \n\t"\
: /* output variables - including inputs that are changed */\
[t1] "=r" (t1),\
[S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\
[S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\
[S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\
: : );\
}
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#include"auxFormat.h"
#define aead_RATE 12
#define PR0_ROUNDS 25
#define PR_ROUNDS 13
#define PRF_ROUNDS 14
/*
#define PR0_ROUNDS 76 /3=25+1
#define PR_ROUNDS 40 /3=13+1
#define PRF_ROUNDS 44 /3=14+2
* */
unsigned char constant7Format[76] = {
0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90,
0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2,
0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99,
0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b,
0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92,
0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8,
0xc7,0x1e,0xd1,0x8f};
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
u32 s[12] = { 0 };
u32 dataFormat[3] = { 0 };
u8 tempData[24] = { 0 };
u32 t2;
*clen = mlen + CRYPTO_ABYTES;
// initialization
packU96FormatToThreePacket(s, npub);
packU96FormatToThreePacket((s + 3), (npub + 12));
packU96FormatToThreePacket((s + 6), k);
packU96FormatToThreePacket((s + 9), (k + 12));
P384_1(s, constant7Format,PR0_ROUNDS);
// process associated data
if (adlen) {
while (adlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
P384_1(s, constant7Format,PR_ROUNDS);
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, aead_RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
P384_1(s, constant7Format,PR_ROUNDS);
}
s[9] ^= 0x80000000;
if (mlen) {
while (mlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
unpackU96FormatToThreePacket(c, s);
P384_1(s, constant7Format,PR_ROUNDS);
mlen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
memset(tempData, 0, aead_RATE);
memcpy(tempData, m, mlen);
tempData[mlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
unpackU96FormatToThreePacket(tempData, s);
memcpy(c, tempData, mlen);
c += mlen;
}
// finalization
P384_2(s, constant7Format,PRF_ROUNDS);
// return tag
unpackU96FormatToThreePacket(tempData, s);
unpackU96FormatToThreePacket((tempData + 12), (s + 3));
memcpy(c, tempData, CRYPTO_ABYTES );
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u32 s[12] = { 0 };
u32 dataFormat[6] = { 0 };
u32 dataFormat_1[3] = { 0 };
u8 tempData[12] = { 0 };
u8 tempU8[48] = { 0 };
u32 t2;
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
// initialization
packU96FormatToThreePacket(s, npub);
packU96FormatToThreePacket((s + 3), (npub + 12));
packU96FormatToThreePacket((s + 6), k);
packU96FormatToThreePacket((s + 9), (k + 12));
P384_1(s, constant7Format,PR0_ROUNDS);
// process associated data
if (adlen) {
while (adlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
P384_1(s, constant7Format,PR_ROUNDS);
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, aead_RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
P384_1(s, constant7Format,PR_ROUNDS);
}
s[9] ^= 0x80000000;
clen -= CRYPTO_ABYTES;
if (clen) {
while (clen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
dataFormat_1[2] = s[2] ^ dataFormat[2];
unpackU96FormatToThreePacket(m, dataFormat_1);
s[0] = dataFormat[0];
s[1] = dataFormat[1];
s[2] = dataFormat[2];
P384_1(s, constant7Format,PR_ROUNDS);
clen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
unpackU96FormatToThreePacket(tempU8, s);
memset(tempData, 0, aead_RATE);
memcpy(tempData, c, clen );
tempData[clen] = 0x01;
U32BIG(((u32*)tempU8)[0]) ^= U32BIG(((u32* )tempData)[0]);
U32BIG(((u32*)tempU8)[1]) ^= U32BIG(((u32* )tempData)[1]);
U32BIG(((u32*)tempU8)[2]) ^= U32BIG(((u32* )tempData)[2]);
memcpy(m, tempU8, clen );
memcpy(tempU8, tempData, clen );
packU96FormatToThreePacket(s, tempU8);
c+=clen;
}
// finalization
P384_2(s, constant7Format,PRF_ROUNDS);
// return tag
unpackU96FormatToThreePacket(tempU8, s);
unpackU96FormatToThreePacket(tempU8 + 12, s + 3);
if (memcmp((void*)tempU8, (void*)(c), CRYPTO_ABYTES)) {
memset(m, 0, (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 24
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 24
#define CRYPTO_ABYTES 24
#define CRYPTO_NOOVERLAP 1
#include"api.h"
typedef unsigned char u8;
typedef unsigned long long u64;
typedef unsigned int u32;
#define RATE (96 / 8)
#define PR0_ROUNDS 76
#define PR_ROUNDS 40
#define PRF_ROUNDS 44
#define sbox32(a, b, c, d, f, g, h) \
{ \
t_1 = ~a; t_2 = b & t_1; t_3 = c ^ t_2; h = d ^ t_3; t_5 = b | c; t_6 = d ^ t_1; g = t_5 ^ t_6; t_8 = b ^ d; t_9 = t_3 & t_6; a = t_8 ^ t_9; t_11 = g & t_8; f = t_3 ^ t_11; \
}
#define sbox64(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1; t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define ROTR64(x,n) (((x)>>(n))|((x)<<(64-(n))))
#define ROTR32(x,n) (((x)>>(n))|((x)<<(32-(n))))
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n)))
#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n)))
#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32)
#define ROTR96MORE322(a,b,n) (b<<n|(u64)a<<(n-32)|b>>(96-n))
#define EXT_BYTE32(x,n) ((u8)((u32)(x)>>(8*(n))))
#define INS_BYTE32(x,n) ((u32)(x)<<(8*(n)))
#define U32BIG(x) (x)
#define EXT_BYTE64(x,n) ((u8)((u64)(x)>>(8*(n))))
#define INS_BYTE64(x,n) ((u64)(x)<<(8*(n)))
#define U64BIG(x) (x)
u8 constant7[127] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06,
0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47,
0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16,
0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53,
0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07,
0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26,
0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37, 0x6f,
0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, 0x1a, 0x34,
0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c, 0x39,
0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a, 0x15, 0x2a, 0x55, 0x2b, 0x57,
0x2f, 0x5f, 0x3f, 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40 };
#define ROUND384(i) {\
x00 ^= constant7[i]; \
sbox64(x00, x10, x20, x30, x50, x60, x70); \
sbox32(x01, x11, x21, x31, x51, x61, x71); \
x11 = ROTR961(x51, x50, 1); \
x10 = ROTR962(x51, x50, 1); \
x21 = ROTR961(x61, x60, 8); \
x20 = ROTR962(x61, x60, 8); \
x31 = ROTR96MORE321(x71, x70, 55); \
x30 = ROTR96MORE322(x71, x70, 55); \
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
*clen = mlen + CRYPTO_KEYBYTES;
u64 i; //RATE=96/8=12
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t_1, t_2, t_3, t_5, t_6, t_8, t_9, t_11;
u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0;
u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0;
u8 tempData[12] = { 0 };
u8 tempData1[24] = { 0 };
u8 tempData2[12] = { 0 };
u64 x50, x60, x70;
u32 x51, x61, x71;
// initialization
x00 = U64BIG(*(u64*)(npub));
x01 = U32BIG(*(u32*)(npub + 8));
x10 = U64BIG(*(u64*)(npub + 12));
x11 = U32BIG(*(u32*)(npub + 20));
x20 = U64BIG(*(u64*)(k));
x21 = U32BIG(*(u32*)(k + 8));
memcpy(&x30, k + 12, 8);
memcpy(&x31, k + 20, 4);
//x30 = U64BIG(*(u64*)(k + 12));
//x31 = U32BIG(*(u32*)(k + 20));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
x00 ^= ((u64)U32BIG(*(u32*)(ad + 4)) << 32) | ((u64)U32BIG(*(u32*)(ad)));
//x00 ^= U64BIG(*(u64*)(ad));
x01 ^= U32BIG(*(u32*)(ad + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
//dataXOR32(&x01, ad + 8, 4);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
x31 ^= 0x80000000;
// process plaintext
if (mlen) {
while (mlen >= RATE) {
x00 ^= ((u64)U32BIG(*(u32*)(m + 4)) << 32) | ((u64)U32BIG(*(u32*)(m)));
//x00 ^= U64BIG(*(u64*)(m));
x01 ^= U32BIG(*(u32*)(m + 8));
*(u64*)c = U64BIG(x00);
*(u32*)(c + 8) = U32BIG(x01);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, m, mlen);
tempData[mlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
*(u64*)tempData1 = U64BIG(x00);
*(u32*)(tempData1 + 8) = U32BIG(x01);
memcpy(c, tempData1, mlen * sizeof(unsigned char));
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return tag
*(u64*)tempData1 = U64BIG(x00);
*(u32*)(tempData1 + 8) = U32BIG(x01);
*(u32*)(tempData1 + 12) = U32BIG(x10);
*(u32*)(tempData1 + 16) = U32BIG(x10 >> 32);
*(u32*)(tempData1 + 20) = U32BIG(x11);
memcpy(c, tempData1, CRYPTO_KEYBYTES);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
if (clen < CRYPTO_KEYBYTES)
return -1;
*mlen = clen - CRYPTO_KEYBYTES;
u8 tempData[12] = { 0 };
u8 tempData1[24] = { 0 };
u64 i; //RATE=96/8=12
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t_1, t_2, t_3, t_5, t_6, t_8, t_9, t_11;
u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0;
u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0;
u64 x50, x60, x70;
u32 x51, x61, x71;
// initialization
x00 = U64BIG(*(u64*)(npub));
x01 = U32BIG(*(u32*)(npub + 8));
x10 = U64BIG(*(u64*)(npub + 12));
x11 = U32BIG(*(u32*)(npub + 20));
x20 = U64BIG(*(u64*)(k));
x21 = U32BIG(*(u32*)(k + 8));
x30 = U64BIG(*(u64*)(k + 12));
x31 = U32BIG(*(u32*)(k + 20));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
x00 ^= ((u64)U32BIG(*(u32*)(ad + 4)) << 32) | ((u64)U32BIG(*(u32*)(ad)));
//x00 ^= U64BIG(*(u64*)(ad));
x01 ^= U32BIG(*(u32*)(ad + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
x31 ^= 0x80000000;
// process c
clen -= CRYPTO_ABYTES;
if (clen > 0) {
while (clen >= RATE) {
*(u32*)(m) = U32BIG(x00) ^ (*(u32*)(c));
*(u32*)(m + 4) = U32BIG(x00>>32) ^ (*(u32*)(c + 4));
*(u32*)(m + 8) = U32BIG(x01) ^ (*(u32*)(c + 8));
x00 = ((u64)U32BIG(*(u32*)(c + 4)) << 32) | ((u64)U32BIG(*(u32*)(c)));
x01 = U32BIG(*(u32*)(c + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
clen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, c, clen * sizeof(unsigned char));
tempData[clen] = 0x01;
*(u64*)(tempData1) = U64BIG(x00) ^ (*(u64*)(tempData));
*(u32*)(tempData1 + 8) = U32BIG(x01) ^ (*(u32*)(tempData + 8));
memcpy(m, tempData1, clen * sizeof(unsigned char));
memcpy(tempData1, c, clen * sizeof(unsigned char));
x00 = U64BIG(*(u64*)(tempData1));
x01 = U32BIG(*(u32*)(tempData1 + 8));
c += clen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
*(u64*)tempData1 = U64BIG(x00);
*(u32*)(tempData1 + 8) = U32BIG(x01);
*(u32*)(tempData1 + 12) = U32BIG(x10);
*(u32*)(tempData1 + 16) = U32BIG(x10 >> 32);
*(u32*)(tempData1 + 20) = U32BIG(x11);
// return -1 if verification fails
if (memcmp((void*)tempData1, (void*)c, CRYPTO_ABYTES)) {
memset(m, 0, sizeof(unsigned char) * (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 24
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 24
#define CRYPTO_ABYTES 24
#define CRYPTO_NOOVERLAP 1
#include"api.h"
typedef unsigned char u8;
typedef unsigned long long u64;
typedef unsigned int u32;
#define RATE (96 / 8)
#define PR0_ROUNDS 76
#define PR_ROUNDS 40
#define PRF_ROUNDS 44
#define sbox32(a, b, c, d, f, g, h) \
{ \
t_1 = ~a; t_2 = b & t_1;t_3 = c ^ t_2; h = d ^ t_3; t_5 = b | c; t_6 = d ^ t_1; g = t_5 ^ t_6; t_8 = b ^ d; t_9 = t_3 & t_6; a = t_8 ^ t_9; t_11 = g & t_8; f = t_3 ^ t_11; \
}
#define sbox64(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define ROTR64(x,n) (((x)>>(n))|((x)<<(64-(n))))
#define ROTR32(x,n) (((x)>>(n))|((x)<<(32-(n))))
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n)))
#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n)))
#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32)
#define ROTR96MORE322(a,b,n) (b<<n|(u64)a<<(n-32)|b>>(96-n))
#define EXT_BYTE32(x,n) ((u8)((u32)(x)>>(8*(n))))
#define INS_BYTE32(x,n) ((u32)(x)<<(8*(n)))
#define U32BIG(x) (x)
#define EXT_BYTE64(x,n) ((u8)((u64)(x)>>(8*(n))))
#define INS_BYTE64(x,n) ((u64)(x)<<(8*(n)))
#define U64BIG(x) (x)
u8 constant7[127] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06,
0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47,
0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16,
0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53,
0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07,
0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26,
0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37, 0x6f,
0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, 0x1a, 0x34,
0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c, 0x39,
0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a, 0x15, 0x2a, 0x55, 0x2b, 0x57,
0x2f, 0x5f, 0x3f, 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40 };
#define ROUND384(i) {\
x00 ^= constant7[i];\
sbox64(x00, x10, x20, x30, x50, x60, x70);\
sbox32(x01, x11, x21, x31, x51, x61, x71);\
x11 = ROTR961(x51, x50, 1);\
x10 = ROTR962(x51, x50, 1);\
x21 = ROTR961(x61, x60, 8);\
x20 = ROTR962(x61, x60, 8);\
x31 = ROTR96MORE321(x71, x70, 55);\
x30 = ROTR96MORE322(x71, x70, 55);\
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
*clen = mlen + CRYPTO_KEYBYTES;
u64 i; //RATE=96/8=12
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t_1, t_2, t_3, t_5, t_6, t_8, t_9, t_11;
u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0;
u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0;
u8 tempData[12] = { 0 };
u8 tempData1[12] = { 0 };
u8 tempData2[12] = { 0 };
u64 x40, x50, x60, x70;
u32 x41, x51, x61, x71;
// initialization
x00 = U64BIG(*(u64*)(npub));
x01 = U32BIG(*(u32*)(npub + 8));
x10 = U64BIG(*(u64*)(npub + 12));
x11 = U32BIG(*(u32*)(npub + 20));
x20 = U64BIG(*(u64*)(k));
x21 = U32BIG(*(u32*)(k + 8));
memcpy(&x30, k + 12, 8);
memcpy(&x31, k + 20, 4);
//x30 = U64BIG(*(u64*)(k + 12));
//x31 = U32BIG(*(u32*)(k + 20));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
x00 ^= U64BIG(*(u64*)(ad));
x01 ^= U32BIG(*(u32*)(ad + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
x31 ^= 0x80000000;
// process plaintext
if (mlen) {
while (mlen >= RATE) {
x00 ^= U64BIG(*(u64*)(m));
x01 ^= U32BIG(*(u32*)(m + 8));
*(u64*)c = U64BIG(x00);
*(u32*)(c + 8) = U32BIG(x01);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, m, mlen);
tempData[mlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
*(u64*)tempData1 = U64BIG(x00);
*(u32*)(tempData1 + 8) = U32BIG(x01);
memcpy(c, tempData1, mlen * sizeof(unsigned char));
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return tag
*(u64*)c = U64BIG(x00);
*(u32*)(c + 8) = U32BIG(x01);
*(u64*)(c + 12) = U64BIG(x10);
*(u32*)(c + 20) = U32BIG(x11);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
*mlen = clen - CRYPTO_KEYBYTES;
if (clen < CRYPTO_KEYBYTES)
return -1;
u8 tempData[12] = { 0 };
u8 tempData1[12] = { 0 };
u64 i; //RATE=96/8=12
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t_1, t_2, t_3, t_5, t_6, t_8, t_9, t_11;
u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0;
u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0;
u64 x40, x50, x60, x70;
u32 x41, x51, x61, x71;
// initialization
x00 = U64BIG(*(u64*)(npub));
x01 = U32BIG(*(u32*)(npub + 8));
x10 = U64BIG(*(u64*)(npub + 12));
x11 = U32BIG(*(u32*)(npub + 20));
x20 = U64BIG(*(u64*)(k));
x21 = U32BIG(*(u32*)(k + 8));
x30 = U64BIG(*(u64*)(k + 12));
x31 = U32BIG(*(u32*)(k + 20));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
x00 ^= U64BIG(*(u64*)(ad));
x01 ^= U32BIG(*(u32*)(ad + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
x00 ^= U64BIG(*(u64*)(tempData));
x01 ^= U32BIG(*(u32*)(tempData + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
x31 ^= 0x80000000;
// process c
clen -= CRYPTO_KEYBYTES;
if (clen > 0) {
while (clen >= RATE) {
*(u64*)(m) = U64BIG(x00) ^ (*(u64*)(c));
*(u32*)(m + 8) = U32BIG(x01) ^ (*(u32*)(c + 8));
x00 = U64BIG(*(u64*)(c));
x01 = U32BIG(*(u32*)(c + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
clen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, c, clen * sizeof(unsigned char));
tempData[clen] = 0x01;
*(u64*)(tempData1) = U64BIG(x00) ^ (*(u64*)(tempData));
*(u32*)(tempData1 + 8) = U32BIG(x01) ^ (*(u32*)(tempData + 8));
memcpy(m, tempData1, clen * sizeof(unsigned char));
memcpy(tempData1, c, clen * sizeof(unsigned char));
x00 = U64BIG(*(u64*)(tempData1));
x01 = U32BIG(*(u32*)(tempData1 + 8));
c += clen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
memcpy(c+CRYPTO_KEYBYTES/2, tempData1, CRYPTO_KEYBYTES/2);
// return -1 if verification fails
if (*(u32*)(c + 8) != U32BIG(x01) || *(u64*)(c) != U64BIG(x00)
|| *(u32*)(c + 20) != U32BIG(x11)
|| *(u64*)(c + 12) != U64BIG(x10)){
memset(m, 0, sizeof(unsigned char) * (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 24
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 24
#define CRYPTO_ABYTES 24
#define CRYPTO_NOOVERLAP 1
#include"api.h"
#include <string.h>
#define PR0_ROUNDS 76
#define PR_ROUNDS 40
#define PRF_ROUNDS 44
typedef unsigned char u8;
typedef unsigned long long u64;
typedef unsigned int u32;
#define RATE 12
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t4 = b | c; t5 = d ^ t1; g = t4 ^ t5; t6 = b ^ d; t7 = t3 & t5; a = t6 ^ t7; t8 = g & t6; f = t3 ^ t8; \
}
#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n)))
#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n)))
#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32)
#define ROTR96MORE322(a,b,n) (b<<n|(u64)a<<(n-32)|b>>(96-n))
#define U32BIG(x) (x)
#define U64BIG(x) (x)
u8 constant7[76] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06,
0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47,
0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16,
0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53,
0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07,
0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26,
0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b};
#define ROUND384(i){\
s[0] ^= constant7[i]; \
sbox(U64BIG(((u64*)s)[0]), U64BIG(((u64*)(s+3))[0]), U64BIG(((u64*)(s+6))[0]), U64BIG(((u64*)(s+9))[0]), x50, x60, x70); \
sbox(s[2], s[5], s[8], s[11], x51, x61, x71); \
s[5] = ROTR961(x51, x50, 1); \
U64BIG(((u64*)(s+3))[0]) = ROTR962(x51, x50, 1); \
s[8] = ROTR961(x61, x60, 8); \
U64BIG(((u64*)(s+6))[0]) = ROTR962(x61, x60, 8); \
s[11] = ROTR96MORE321(x71, x70, 55); \
U64BIG(((u64*)(s+9))[0]) = ROTR96MORE322(x71, x70, 55); \
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
*clen = mlen + CRYPTO_ABYTES;
u32 s[12] = { 0 }, i;
u64 t1, t2, t3, t5, t6, t8, t4, t7;
u64 x50, x60, x70;
u32 x51, x61, x71;
u8 tempData[24] = { 0 };
// initialization
memcpy(s, npub, sizeof(unsigned char) * CRYPTO_NPUBBYTES);
memcpy(s + CRYPTO_NPUBBYTES / 4, k, sizeof(unsigned char) * CRYPTO_KEYBYTES);
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(ad));
s[2] ^= U64BIG(*(u64*)(ad+8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData));
s[2] ^= U32BIG(*(u32*)(tempData + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
s[11] ^= 0x80000000;
// process plaintext
if (mlen) {
while (mlen >= RATE) {
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(m));
s[2] ^= U32BIG(*(u32*)(m + 8));
memcpy(c, s, RATE * sizeof(unsigned char));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen * sizeof(unsigned char));
tempData[mlen] = 0x01;
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData));
s[2] ^= U32BIG(*(u32*)(tempData + 8));
memcpy(c, s, mlen * sizeof(unsigned char));
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return tag
memcpy(c, s, sizeof(unsigned char) * CRYPTO_ABYTES);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_KEYBYTES)
return -1;
u32 s[12] = { 0 }, i;
u64 t1, t2, t3, t5, t6, t8, t4, t7;
u64 x50, x60, x70;
u32 x51, x61, x71;
u8 tempData[24] = { 0 };
// initialization
memcpy(s, npub, sizeof(unsigned char) * CRYPTO_NPUBBYTES);
memcpy(s + CRYPTO_NPUBBYTES / 4, k, sizeof(unsigned char) * CRYPTO_KEYBYTES);
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(ad));
s[2] ^= U32BIG(*(u32*)(ad + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData));
s[2] ^= U32BIG(*(u32*)(tempData + 8));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
s[11] ^= 0x80000000;
// process c
/////////
clen -= CRYPTO_ABYTES;
if (clen) {
while (clen >= RATE) {
U64BIG(*(u64*)(m)) = U64BIG(*(u64*)(s)) ^ U64BIG(*(u64*)(c));
*(u32*)(m + 8) = s[2] ^ (*(u32*)(c + 8));
memcpy(s, c, RATE * sizeof(unsigned char));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
clen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, c, clen * sizeof(unsigned char));
tempData[clen] = 0x01;
U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData));
s[2] ^= U32BIG(*(u32*)(tempData + 8));
memcpy(m, s, clen * sizeof(unsigned char));
memcpy(s, c, clen * sizeof(unsigned char));
// memcpy(m, tempData1, clen * sizeof(unsigned char));
c += clen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
if (memcmp((void*)s, (void*)c, CRYPTO_ABYTES)) {
memset(m, 0, sizeof(unsigned char) * (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 32 //256/8=32
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 32
#define CRYPTO_ABYTES 32
#define CRYPTO_NOOVERLAP 1
#include"api.h"
#include <string.h>
#define U32BIG(x) (x)
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
void printU8(char name[], u8 var[], long len, int offset);
// t9
#define puck32(in)\
{\
t9 = (in ^ (in >> 1)) & 0x22222222; in ^= t9 ^ (t9 << 1);\
t9 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= t9 ^ (t9 << 2);\
t9 = (in ^ (in >> 4)) & 0x00F000F0; in ^= t9 ^ (t9 << 4);\
t9 = (in ^ (in >> 8)) & 0x0000FF00; in ^= t9 ^ (t9 << 8);\
}
// t9
#define unpuck32(t0){\
t9 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= t9 ^ (t9 << 8); \
t9 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= t9 ^ (t9 << 4); \
t9 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= t9 ^ (t9 << 2); \
t9 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= t9 ^ (t9 << 1); \
}
//u32 t1, t2, t3,t8,
#define packU128FormatToFourPacket(out,in) {\
t8 = U32BIG(((u32*)in)[0]); \
t1 = U32BIG(((u32*)in)[1]); \
t2 = U32BIG(((u32*)in)[2]); \
t3 = U32BIG(((u32*)in)[3]); \
puck32(t8); puck32(t8); \
puck32(t1); puck32(t1); \
puck32(t2); puck32(t2); \
puck32(t3); puck32(t3); \
out[3] = (t3 & 0xff000000) | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t8 >> 24); \
out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) | ((t1 >> 8) & 0x0000ff00) | ((t8 >> 16) & 0x000000ff); \
out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) | (t1 & 0x0000ff00) | ((t8 >> 8) & 0x000000ff); \
out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) | ((t1 << 8) & 0x0000ff00) | (t8 & 0x000000ff); \
}
//u32 u32 t1, t2, t3,t8,
#define unpackU128FormatToFourPacket( out, in) {\
t[3] = (in[3] & 0xff000000 )| ((in[2] >> 8) & 0x00ff0000) | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); \
t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) | ((in[1] >> 8) & 0x0000ff00) | ((in[0] >> 16) & 0x000000ff); \
t[1] = ((in[3] << 16) & 0xff000000) | ((in[2] << 8) & 0x00ff0000) | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); \
t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); \
unpuck32(t[0]); \
unpuck32(t[0]); \
unpuck32(t[1]); \
unpuck32(t[1]); \
unpuck32(t[2]); \
unpuck32(t[2]); \
unpuck32(t[3]);\
unpuck32(t[3]); \
memcpy(out, t, 16 * sizeof(unsigned char));\
}
#define BIT_LOTR32_16(t0,t1,t2,t3,t4,t5,t6,t7){\
t4= LOTR32(t0, 4);\
t5 = LOTR32(t1, 4);\
t6 = LOTR32(t2, 4); \
t7 = LOTR32(t3, 4); \
}
#define BIT_LOTR32_25(t0,t1,t2,t3,t4,t5,t6,t7){\
t4= LOTR32(t3, 7);\
t5 = LOTR32(t0, 6);\
t6 = LOTR32(t1, 6); \
t7 = LOTR32(t2, 6); \
}
#define ROUND512( lunNum) {\
s[3] ^= (constant7Format_aead[lunNum] >> 6) & 0x3;\
s[2] ^= (constant7Format_aead[lunNum] >> 4) & 0x3;\
s[1] ^= (constant7Format_aead[lunNum] >> 2) & 0x3;\
s[0] ^= constant7Format_aead[lunNum] & 0x3;\
sbox(s[3], s[7], s[11], s[15], s_temp[7], s_temp[11], s_temp[15]);\
sbox(s[2], s[6], s[10], s[14], s[7] , s_temp[10], s_temp[14]);\
sbox(s[1], s[5], s[9], s[13], s[6] , s_temp[9], s_temp[13]);\
sbox(s[0], s[4], s[8], s[12], s[5] , s_temp[8], s_temp[12]);\
s[4]= LOTR32(s_temp[7], 1);\
BIT_LOTR32_16(s_temp[8], s_temp[9], s_temp[10], s_temp[11], s[8], s[9], s[10], s[11]);\
BIT_LOTR32_25(s_temp[12], s_temp[13], s_temp[14], s_temp[15], s[12], s[13], s[14], s[15]);\
}
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#include"auxFormat.h"
#define aead_RATE 16
#define PR0_ROUNDS 100
#define PR_ROUNDS 52
#define PRF_ROUNDS 56
/*
#define PR0_ROUNDS 100
#define PR_ROUNDS 52
#define PRF_ROUNDS 56
* */
unsigned char constant7Format_aead[100] = { 0x01, 0x04, 0x10, 0x40, 0x02, 0x08,
0x21, 0x05, 0x14, 0x50, 0x42, 0x0a, 0x29, 0x24, 0x11, 0x44, 0x12, 0x48,
0x23, 0x0d, 0x35, 0x55, 0x56, 0x5a, 0x6b, 0x2e, 0x38, 0x60, 0x03, 0x0c,
0x31, 0x45, 0x16, 0x58, 0x63, 0x0f, 0x3d, 0x74, 0x53, 0x4e, 0x3b, 0x6c,
0x32, 0x49, 0x27, 0x1d, 0x75, 0x57, 0x5e, 0x7b, 0x6e, 0x3a, 0x68, 0x22,
0x09, 0x25, 0x15, 0x54, 0x52, 0x4a, 0x2b, 0x2c, 0x30, 0x41, 0x06, 0x18,
0x61, 0x07, 0x1c, 0x71, 0x47, 0x1e, 0x79, 0x66, 0x1b, 0x6d, 0x36, 0x59,
0x67, 0x1f, 0x7d, 0x76, 0x5b, 0x6f, 0x3e, 0x78, 0x62, 0x0b, 0x2d, 0x34,
0x51, 0x46, 0x1a, 0x69, 0x26, 0x19, 0x65, 0x17, 0x5c, 0x73, };
#define Processing_Data(data) \
do { \
packU128FormatToFourPacket(dataFormat, data); \
s[0] ^= dataFormat[0]; \
s[1] ^= dataFormat[1]; \
s[2] ^= dataFormat[2]; \
s[3] ^= dataFormat[3]; \
} while (0)
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
u32 i;
u32 s_temp[16] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
u32 s[16] = { 0 };
u32 t[4] = { 0 };
u32 dataFormat[4] = { 0 };
u8 tempData[16] = { 0 };
u8 tempU8[32] = { 0 };
*clen = mlen + CRYPTO_ABYTES;
//initialization
packU128FormatToFourPacket(s, npub);
packU128FormatToFourPacket((s + 4), (npub + 16));
packU128FormatToFourPacket((s + 8), k);
packU128FormatToFourPacket((s + 12), (k + 16));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND512(i);
}
// process associated data
//PAD(adlen, ad);
if (adlen) {
while (adlen >= aead_RATE) {
Processing_Data(ad);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, aead_RATE);
memcpy(tempData, ad, adlen );
tempData[adlen] = 0x01;
Processing_Data(tempData);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
}
s[15] ^= 0x80000000;
// process p data
if (mlen) {
while (mlen >= aead_RATE) {
Processing_Data(m);
unpackU128FormatToFourPacket(c, s);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
mlen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
memset(tempData, 0, aead_RATE);
memcpy(tempData, m, mlen );
tempData[mlen] = 0x01;
Processing_Data(tempData);
unpackU128FormatToFourPacket(tempData, s);
memcpy(c, tempData, mlen );
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND512(i);
}
unpackU128FormatToFourPacket(tempU8, s);
unpackU128FormatToFourPacket((tempU8 + 16), (s + 4));
memcpy(c, tempU8, CRYPTO_ABYTES );
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u32 s_temp[16] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
u8 i;
// initialization
u32 s[16] = { 0 };
u32 dataFormat_1[4] = { 0 };
u32 dataFormat[4] = { 0 };
u8 tempData[16] = { 0 };
u8 tempU8[64] = { 0 };
u32 t[4] = { 0 };
if (clen < CRYPTO_ABYTES)
return -1;
*mlen = clen - CRYPTO_ABYTES;
//initialization
packU128FormatToFourPacket(s, npub);
packU128FormatToFourPacket((s + 4), (npub + 16));
packU128FormatToFourPacket((s + 8), k);
packU128FormatToFourPacket((s + 12), (k + 16));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND512(i);
}
// process associated data
if (adlen) {
while (adlen >= aead_RATE) {
Processing_Data(ad);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, aead_RATE);
memcpy(tempData, ad, adlen );
tempData[adlen] = 0x01;
Processing_Data(tempData);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
}
s[15] ^= 0x80000000;
// process c data
clen = clen - CRYPTO_KEYBYTES;
if (clen) {
while (clen >= aead_RATE) {
packU128FormatToFourPacket(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
dataFormat_1[2] = s[2] ^ dataFormat[2];
dataFormat_1[3] = s[3] ^ dataFormat[3];
unpackU128FormatToFourPacket(m, dataFormat_1);
s[0] = dataFormat[0];
s[1] = dataFormat[1];
s[2] = dataFormat[2];
s[3] = dataFormat[3];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
clen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
unpackU128FormatToFourPacket(tempU8, s);
memset(tempData, 0, aead_RATE);
memcpy(tempData, c, clen );
tempData[clen] = 0x01;
U32BIG(((u32*)tempU8)[0]) ^= U32BIG(
((u32* )tempData)[0]);
U32BIG(((u32*)tempU8)[1]) ^= U32BIG(
((u32* )tempData)[1]);
U32BIG(((u32*)tempU8)[2]) ^= U32BIG(
((u32* )tempData)[2]);
U32BIG(((u32*)tempU8)[3]) ^= U32BIG(
((u32* )tempData)[3]);
memcpy(m, tempU8, clen );
memcpy(tempU8, tempData, clen );
c += clen;
tempU8[i] ^= 0x01;
packU128FormatToFourPacket(s, tempU8);
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND512(i);
}
unpackU128FormatToFourPacket(tempU8, s);
unpackU128FormatToFourPacket((tempU8 + 16), (s + 4));
if (memcmp((void*) tempU8, (void*) c, CRYPTO_ABYTES)) {
memset(m, 0, (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 32
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 32
#define CRYPTO_ABYTES 32
#define CRYPTO_NOOVERLAP 1
#include <stdio.h>
#include "api.h"
typedef unsigned char u8;
typedef unsigned long long u64;
typedef long long i64;
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static const u8 constant7[100] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41,
0x03, 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51,
0x23, 0x47, 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45,
0x0b, 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54,
0x29, 0x53, 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21,
0x43, 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49,
0x13, 0x26, 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b,
0x37, 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d,
0x1a, 0x34, 0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d };
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define LOTR1281(a,b,n) (((a)<<(n))|((b)>>(64-n)))
#define LOTR1282(a,b,n) (((b)<<(n))|((a)>>(64-n)))
#define U64BIG(x) (x)
#define RATE 16
#define PR0_ROUNDS 100
#define PR_ROUNDS 52
#define PRF_ROUNDS 56
#define ROUND512(i) {\
s[0]^=constant7[i];\
sbox(s[0], s[2], s[4], s[6], b10, b20, b30);\
sbox(s[1], s[3], s[5], s[7], b11, b21, b31);\
s[2]=LOTR1281(b10,b11,1);\
s[4]=LOTR1281(b20,b21,16);\
s[6]=LOTR1281(b30,b31,25);\
s[3]=LOTR1282(b10,b11,1);\
s[5]=LOTR1282(b20,b21,16);\
s[7]=LOTR1282(b30,b31,25);\
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
*clen = mlen + CRYPTO_ABYTES;
u64 b11, b21, b31, b10, b20, b30;
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u64 s[8] = { 0 };
u64 i;
u8 tempData[32] = { 0 };
// initialization
memcpy(s, npub, CRYPTO_NPUBBYTES);
memcpy(s + 4, k, CRYPTO_KEYBYTES);
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND512(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
s[0] ^= U64BIG(((u64*)ad)[0]);
s[1] ^= U64BIG(((u64*)ad)[1]);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
s[0] ^= U64BIG(((u64*)tempData)[0]);
s[1] ^= U64BIG(((u64*)tempData)[1]);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
}
s[7] ^= 0x8000000000000000;
// process plaintext
if (mlen) {
while (mlen >= RATE) {
s[0] ^= U64BIG(((u64*)m)[0]);
s[1] ^= U64BIG(((u64*)m)[1]);
memcpy(c, s, RATE);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, m, mlen);
tempData[mlen] = 0x01;
s[0] ^= U64BIG(((u64*)tempData)[0]);
s[1] ^= U64BIG(((u64*)tempData)[1]);
memcpy(c, s, mlen);
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND512(i);
}
// return tag
memcpy(c, s, CRYPTO_ABYTES);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
u64 b11, b21, b31, b10, b20, b30;
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u64 s[8] = { 0 };
u64 i;
u8 tempData[32] = { 0 };
// initialization
memcpy(s, npub, CRYPTO_NPUBBYTES);
memcpy(s + 4, k, CRYPTO_KEYBYTES);
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND512(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
s[0] ^= U64BIG(((u64*)ad)[0]);
s[1] ^= U64BIG(((u64*)ad)[1]);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
s[0] ^= U64BIG(((u64*)tempData)[0]);
s[1] ^= U64BIG(((u64*)tempData)[1]);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
}
s[7] ^= 0x8000000000000000;
clen -= CRYPTO_ABYTES;
if (clen) {
while (clen >= RATE) {
U64BIG(((u64*)m)[0]) = s[0] ^ U64BIG(((u64*)c)[0]);
U64BIG(((u64*)m)[1]) = s[1] ^ U64BIG(((u64*)c)[1]);
memcpy(s, c, RATE);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
clen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, c, clen);
tempData[clen] = 0x01;
s[0] ^= U64BIG(((u64*)tempData)[0]);
s[1] ^= U64BIG(((u64*)tempData)[1]);
memcpy(m, s, clen);
memcpy(s, c, clen);
c += clen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND512(i);
}
if (memcmp((void*)s, (void*)c, CRYPTO_ABYTES)) {
memset(m, 0, (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 32 //256/8=32
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 32
#define CRYPTO_ABYTES 32
#define CRYPTO_NOOVERLAP 1
#include <stdio.h>
#include "api.h"
#include <string.h>
#include <stdlib.h>
#include <emmintrin.h>//sse2 header file(include sse header file)
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define PR0_ROUNDS 100
#define PR_ROUNDS 52
#define PRF_ROUNDS 56
typedef unsigned char u8;
typedef unsigned long long u64;
typedef long long i64;
#define forward_sbox_SSE(a, b, c, d, f, g, h) \
{ \
tmm1 =_mm_xor_si128( a , all1 ); \
tmm2 =_mm_and_si128( b, tmm1 ); \
tmm3 =_mm_xor_si128( c , tmm2 ); \
h=_mm_xor_si128( d , tmm3 ); \
tmm5 =_mm_or_si128 ( b, c); \
tmm6=_mm_xor_si128( d, tmm1 ); \
g=_mm_xor_si128( tmm5, tmm6 ); \
tmm8=_mm_xor_si128( b, d ); \
tmm9=_mm_and_si128( tmm3, tmm6 ); \
a=_mm_xor_si128( tmm8, tmm9 ); \
tmm11=_mm_and_si128( g, tmm8 ); \
f=_mm_xor_si128( tmm3, tmm11 ); \
}
u8 constant7[100] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41,
0x03, 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51,
0x23, 0x47, 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45,
0x0b, 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54,
0x29, 0x53, 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21,
0x43, 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49,
0x13, 0x26, 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b,
0x37, 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d,
0x1a, 0x34, 0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d };
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define LOTR1281(a,b,n) (((a)<<(n))|((b)>>(64-n)))
#define LOTR1282(a,b,n) (((b)<<(n))|((a)>>(64-n)))
#define U64BIG(x) (x)
#define RATE 16
#define PR0_ROUNDS 100
#define PR_ROUNDS 52
#define PRF_ROUNDS 56
#define ROUND512(i) {\
state[0] = _mm_xor_si128(state[0], _mm_set_epi64x(0, (u64)constant7[i])); \
forward_sbox_SSE(state[0], state[1], state[2], state[3], out1, out2, out3); \
state[1] = _mm_or_si128(_mm_slli_epi64(out1, 1), _mm_srli_epi64(_mm_shuffle_epi32(out1, _MM_SHUFFLE(1, 0, 3, 2)), 63)); \
state[2] = _mm_or_si128(_mm_slli_epi64(out2, 16), _mm_srli_epi64(_mm_shuffle_epi32(out2, _MM_SHUFFLE(1, 0, 3, 2)), 48)); \
state[3] = _mm_or_si128(_mm_slli_epi64(out3, 25), _mm_srli_epi64(_mm_shuffle_epi32(out3, _MM_SHUFFLE(1, 0, 3, 2)), 39)); \
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
*clen = mlen + CRYPTO_ABYTES;
__m128i all1 = _mm_set1_epi32(0xffffffff);
//__m128i tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7, tmm8, tmm9, tmm10, tmm11, out1, out2, out3;
__m128i tmm1, tmm2, tmm3, tmm5, tmm6, tmm8, tmm9, tmm11, out1, out2, out3;
__m128i state[4];
u64 i;
u8 tempData[16] = { 0 };
// initialization
state[0] = _mm_loadu_si128((__m128i*)(npub));
state[1] = _mm_loadu_si128((__m128i*)(npub + 16));
state[2] = _mm_loadu_si128((__m128i*)(k));
state[3] = _mm_loadu_si128((__m128i*)(k + 16));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND512(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(ad)));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(tempData)));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
}
state[3] = _mm_xor_si128(state[3], _mm_set_epi64x((u64)0x8000000000000000, 0));
// process plaintext
if (mlen) {
while (mlen >= RATE) {
state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(m)));
memcpy(c, state, RATE);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, m, mlen);
tempData[mlen] = 0x01;
state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(tempData)));
memcpy(c, state, mlen);
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND512(i);
}
// return tag
memcpy(c, state, sizeof(unsigned char) * CRYPTO_ABYTES);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
__m128i all1 = _mm_set1_epi32(0xffffffff);
__m128i tmm1, tmm2, tmm3, tmm5, tmm6, tmm8, tmm9, tmm11, out1, out2, out3;
__m128i state[4];
u64 i;
u8 tempData[16] = { 0 };
// initialization
state[0] = _mm_loadu_si128((__m128i*)(npub));
state[1] = _mm_loadu_si128((__m128i*)(npub + 16));
state[2] = _mm_loadu_si128((__m128i*)(k));
state[3] = _mm_loadu_si128((__m128i*)(k + 16));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND512(i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(ad)));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(tempData)));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
}
state[3] = _mm_xor_si128(state[3], _mm_set_epi64x((u64)0x8000000000000000, 0));
clen -= CRYPTO_ABYTES;
if (clen) {
while (clen >= RATE) {
state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(c)));
memcpy(m, state, RATE);
memcpy(state, c, RATE);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(i);
}
clen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, c, clen);
tempData[clen] = 0x01;
state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(tempData)));
memcpy(m, state, clen);
memcpy(state, c, clen);
c += clen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND512(i);
}
if (memcmp((void*)state, (void*)c, CRYPTO_ABYTES)) {
memset(m, 0, sizeof(unsigned char) * (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
#include"auxFormat.h"
void P256(unsigned int *s, unsigned char *rc, unsigned char rounds)
{
unsigned int reg1, reg2;
asm volatile (
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,2,4,6 sbox1(s[0], s[2], s[4], s[6]); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_2], %[S_0] \n\t"
"eors %[reg1], %[S_4], %[reg1] \n\t"
"orrs %[S_4], %[S_2], %[S_4] \n\t"
"eors %[S_0], %[S_6], %[S_0] \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_2], %[S_6] \n\t"
"eors %[S_6], %[S_6], %[reg1] \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_2], %[S_4], %[reg2] \n\t"
"eors %[S_2], %[reg1], %[S_2] \n\t"
"/*sbox first column 1,3,5,7 sbox1(s[1], s[3], s[5], s[7]) */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_3], %[S_1] \n\t"
"eors %[reg1], %[S_5], %[reg1] \n\t"
"orrs %[S_5], %[S_3], %[S_5] \n\t"
"eors %[S_1], %[S_7], %[S_1] \n\t"
"eors %[S_5], %[S_5], %[S_1] \n\t"
"eors %[reg2], %[S_3], %[S_7] \n\t"
"eors %[S_7], %[S_7], %[reg1] \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[reg2],%[S_1] \n\t"
"ands %[S_3], %[S_5], %[reg2] \n\t"
"eors %[S_3], %[reg1], %[S_3] \n\t"
"enc_loop2: \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_3], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_3], %[S_4], %[reg2] \n\t"
"eors %[S_3], %[reg1], %[S_3] \n\t"
"/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_2], %[reg1],%[S_2] \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_2], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_2], %[S_4], %[reg2] \n\t"
"eors %[S_2], %[reg1], %[S_2] \n\t"
"/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_3], %[reg1],%[S_3] \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_3], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_3], %[S_4], %[reg2] \n\t"
"eors %[S_3], %[reg1], %[S_3] \n\t"
"/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_2], %[reg1],%[S_2] \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_2], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_2], %[S_4], %[reg2] \n\t"
"eors %[S_2], %[reg1], %[S_2] \n\t"
"/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_3], %[reg1],%[S_3] \n\t"
"/*loop control*/ \n\t"
"subs %[ro], %[ro], #1 \n\t"
"bne enc_loop2 \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_3], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_3], %[S_4], %[reg2] \n\t"
"eors %[S_3], %[reg1], %[S_3] \n\t"
"/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_2], %[reg1],%[S_2] \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\
"adds %[rc], %[rc], #1 \n\t"
"/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_2], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_2], %[S_4], %[reg2] \n\t"
"eors %[S_2], %[reg1], %[S_2] \n\t"
"/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_3], %[reg1],%[S_3] \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\
"/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_3], %[S_0] \n\t"
"eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t"
"orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t"
"eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t"
"eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_3], %[S_4], %[reg2] \n\t"
"eors %[S_3], %[reg1], %[S_3] \n\t"
"/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t"
"eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t"
"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t"
"orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t"
"eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t"
"eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t"
"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t"
"eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t"
"eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t"
"eors %[S_2], %[reg1],%[S_2] \n\t"
"ROR %[S_3], #31 \n\t"
"ROR %[S_4], #28 \n\t"
"ROR %[S_5], #28 \n\t"
"ROR %[S_6], #20 \n\t"
"ROR %[S_7], #19 \n\t"
: /* output variables - including inputs that are changed */
[ro] "+r" (rounds),[reg1] "=r" (reg1), [reg2] "=r" (reg2), [rc] "+r" (rc),
[S_0] "+r" (s[0]), [S_2] "+r" (s[2]), [S_4] "+r" (s[4]), [S_6] "+r" (s[6]) ,
[S_1] "+r" (s[1]), [S_3] "+r" (s[3]), [S_5] "+r" (s[5]), [S_7] "+r" (s[7])
: /* input variables */
: /* clobber registers for temporary values */
);
}
void unpackFormat(u8 * out, u32 * in) {
u32 t[2] = { 0 };
t[1] = (in[0] & 0xFFFF0000) | (in[1] >> 16);
t[0] = (in[1] & 0x0000FFFF) | (in[0] << 16);
u32 r0, r1;
r0 = (t[0] ^ (t[0] >> 8)) & 0x0000FF00, t[0] ^= r0 ^ (r0 << 8);
r0 = (t[0] ^ (t[0] >> 4)) & 0x00F000F0, t[0] ^= r0 ^ (r0 << 4);
r0 = (t[0] ^ (t[0] >> 2)) & 0x0C0C0C0C, t[0] ^= r0 ^ (r0 << 2);
r0 = (t[0] ^ (t[0] >> 1)) & 0x22222222, t[0] ^= r0 ^ (r0 << 1);
r1 = (t[1] ^ (t[1] >> 8)) & 0x0000FF00, t[1] ^= r1 ^ (r1 << 8);
r1 = (t[1] ^ (t[1] >> 4)) & 0x00F000F0, t[1] ^= r1 ^ (r1 << 4);
r1 = (t[1] ^ (t[1] >> 2)) & 0x0C0C0C0C, t[1] ^= r1 ^ (r1 << 2);
r1 = (t[1] ^ (t[1] >> 1)) & 0x22222222, t[1] ^= r1 ^ (r1 << 1);
memcpy(out, t, 8 * sizeof(unsigned char));
}
void getU32Format(u32 *out, const u8* in) {
u32 r0, lo = U32BIG(((u32* )in)[0]);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
*out = lo;
}
#include"api.h"
#define U32BIG(x) (x)
#include<string.h>
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
void getU32Format(u32 *out, const u8* in);
void unpackFormat(u8 * out, u32 * in) ;
void P256(unsigned int *s, unsigned char *rc, unsigned char rounds);
#include"auxFormat.h"
//#define hash_RATE (32 / 8)
#define hash_RATE 4
#define PRH_ROUNDS 16
//#define PRH_ROUNDS 33
//#define PRH_ROUNDS 68
unsigned char constant7Format[68] = {
/*constant7_hash_256v1:*/
0x1,
0x10,
0x2,
0x20,
0x4,
0x40,
0x9,
0x11,
0x12,
0x22,
0x24,
0x44,
0x49,
0x18,
0x3,
0x30,
0x6,
0x60,
0xd,
0x51,
0x1b,
0x33,
0x36,
0x66,
0x6d,
0x5c,
0x4a,
0x28,
0x5,
0x50,
0xb,
0x31,
0x16,
0x62,
0x2d,
0x55,
0x5b,
0x3a,
0x27,
0x74,
0x4f,
0x78,
0xe,
0x61,
0x1d,
0x53,
0x3b,
0x37,
0x76,
0x6f,
0x7c,
0x4e,
0x68,
0xc,
0x41,
0x19,
0x13,
0x32,
0x26,
0x64,
0x4d,
0x58,
0xa,
0x21,
0x14,
0x42,
0x29,
0x15,
};
int crypto_hash(unsigned char *out, const unsigned char *in,
unsigned long long inlen) {
u32 dataFormat[2] = { 0 };
// initialization
u32 s[8] = { 0 };
u8 tempData[32];
//absorb
//RATE=4
while (inlen >= hash_RATE) {
getU32Format(dataFormat, in);
s[0] ^= dataFormat[0] >>16;
s[1] ^= dataFormat[0] &0xffff;
P256(s, constant7Format, PRH_ROUNDS);
inlen -= hash_RATE;
in += hash_RATE;
}
memset(tempData, 0, hash_RATE);
memcpy(tempData, in, inlen * sizeof(unsigned char));
tempData[inlen] = 0x01;
getU32Format(dataFormat, tempData);
s[0] ^= dataFormat[0] >> 16;
s[1] ^= dataFormat[0] & 0xffff;
P256(s, constant7Format, PRH_ROUNDS);
//sequeez
unpackFormat(out, s);
unpackFormat((out + 8), (s + 2));
P256(s, constant7Format, PRH_ROUNDS);
out += CRYPTO_BYTES / 2;
unpackFormat(out, s);
unpackFormat((out + 8), (s + 2));
return 0;
}
#include"api.h"
#include<string.h>
typedef unsigned char u8;
typedef unsigned long long u64;
typedef unsigned int u32;
#define PRH_ROUNDS 68
#define RATE 4
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define LOTR64(x,n) (((x)<<(n))|((x)>>(64-(n))))
#define ROUND256(i) {\
x0^=constant7[i];\
sbox(x0, x1, x2, x3, x5, x6, x7);\
x1=LOTR64(x5,1);\
x2=LOTR64(x6,8);\
x3=LOTR64(x7,25);\
}
#define U32BIG(x) (x)
#define U64BIG(x) (x)
u8 constant7[127] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06,
0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47,
0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16,
0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53,
0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07,
0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26,
0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37, 0x6f,
0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, 0x1a, 0x34,
0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c, 0x39,
0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a, 0x15, 0x2a, 0x55, 0x2b, 0x57,
0x2f, 0x5f, 0x3f, 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40 };
int crypto_hash(unsigned char *out, const unsigned char *in,
unsigned long long inlen) {
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u64 x3 = 0, x2 = 0, x1 = 0, x0 = 0, x7, x6, x5;
u64 i;
u8 tempData[32];
// initialization
//absorb
//RATE=4
while (inlen >= RATE) {
x0 ^= (u64)U32BIG(((u32*)in)[0]);
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND256(i);
}
inlen -= RATE;
in += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, in, inlen * sizeof(unsigned char));
tempData[inlen] = 0x01;
x0 ^= (u64)U32BIG(((u32*)tempData)[0]);
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND256(i);
}
//sequeez
((u64*)out)[0] = U64BIG(x0);
((u64*)out)[1] = U64BIG(x1);
out += CRYPTO_BYTES / 2;
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND256(i);
}
((u64*)out)[0] = U64BIG(x0);
((u64*)out)[1] = U64BIG(x1);
return 0;
}
#include "api.h"
#define PRH_ROUNDS 80
typedef unsigned char u8;
typedef unsigned long long u64;
typedef long long i64;
typedef unsigned int u32;
#define RATE 16
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32)
#define ROTR96MORE322(a,b,n) (b<<n|(u64)a<<(n-32)|b>>(96-n))
#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n)))
#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n)))
#define U32BIG(x) (x)
#define U64BIG(x) (x)
u8 constant7[80] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06,
0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47,
0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16,
0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53,
0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07,
0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26,
0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37};
#define ROUND384(i) {\
x00 ^= constant7[i];\
sbox(x00, x10, x20, x30, x50, x60, x70);\
sbox(x01, x11, x21, x31, x51, x61, x71);\
x11 = ROTR961(x51, x50, 1);\
x10 = ROTR962(x51, x50, 1);\
x21 = ROTR961(x61, x60, 8);\
x20 = ROTR962(x61, x60, 8);\
x31 = ROTR96MORE321(x71, x70, 55);\
x30 = ROTR96MORE322(x71, x70, 55);\
}
int crypto_hash(unsigned char *out, const unsigned char *in,
unsigned long long inlen) {
u64 i;
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0;
u32 x31 = 0x80000000, x21 = 0, x11 = 0, x01 = 0;
u64 x50, x60, x70;
u32 x51, x61, x71;
u8 tempData1[16] = { 0 };
// initialization
//absorb
while (inlen >= RATE) {
x00 ^= U64BIG(*(u64*)in);
x01 ^= U32BIG(*(u32*)(in + 8));
x10 ^= U32BIG(*(u32*)(in + 12));
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND384(i);
}
inlen -= RATE;
in += RATE;
}
memset(tempData1, 0, RATE);
memcpy(tempData1, in, inlen );
tempData1[inlen] = 0x01;
x00 ^= U64BIG(*(u64*)tempData1);
x01 ^= U32BIG(*(u32*)(tempData1 + 8));
x10 ^= U32BIG(*(u32*)(tempData1 + 12));
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND384(i);
}
//sequeez
*(u64*)(out) = U64BIG(x00);
*(u32*)(out + 8) = U32BIG(x01);
*(u32*)(out + 12) = U64BIG(x10);
out += CRYPTO_BYTES / 2;
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND384(i);
}
*(u64*)(out) = U64BIG(x00);
*(u32*)(out + 8) = U32BIG(x01);
*(u32*)(out + 12) = U64BIG(x10);
return 0;
}
#include "api.h"
#include <string.h>
typedef unsigned char u8;
typedef unsigned long long u64;
typedef unsigned int u32;
#define RATE 6
#define PRH_ROUNDS 104
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n)))
#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n)))
#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32)
#define ROTR96MORE322(a,b,n) (b<<n|(u64)a<<(n-32)|b>>(96-n))
#define U32BIG(x) (x)
#define U64BIG(x) (x)
u8 constant7[104] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06,
0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47,
0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16,
0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53,
0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07,
0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26,
0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37, 0x6f,
0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, 0x1a, 0x34,
0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c };
#define ROUND384(i) {\
x00 ^= constant7[i];\
sbox(x00, x10, x20, x30, x50, x60, x70);\
sbox(x01, x11, x21, x31, x51, x61, x71);\
x11 = ROTR961(x51, x50, 1);\
x10 = ROTR962(x51, x50, 1);\
x21 = ROTR961(x61, x60, 8);\
x20 = ROTR962(x61, x60, 8);\
x31 = ROTR96MORE321(x71, x70, 55);\
x30 = ROTR96MORE322(x71, x70, 55);\
}
int crypto_hash(unsigned char *out, const unsigned char *in,
unsigned long long inlen) {
u64 i;
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0;
u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0;
u64 x50, x60, x70;
u32 x51, x61, x71;
u8 tempData1[24] = { 0 };
// initialization
//absorb
while (inlen >= RATE) {
//x00 ^= U64BIG(*(u64*)(in)) & (0x0000FFFFFFFFFFFFULL);
memcpy(&tempData1, in, RATE);
x00 ^= U64BIG(((u64*)tempData1)[0]);
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND384(i);
}
inlen -= RATE;
in += RATE;
}
memset(tempData1, 0, RATE);
memcpy(tempData1, in, inlen * sizeof(unsigned char));
tempData1[inlen] = 0x01;
x00 ^= U64BIG(((u64*)tempData1)[0]);
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND384(i);
}
//sequeez
*(u64*)(out) = U64BIG(x00);
*(u32*)(out + 8) = U32BIG(x01);
*(u64*)tempData1 = U64BIG(x10);
*(u32*)(tempData1 + 8) = U32BIG(x11);
memcpy(out+12, tempData1, CRYPTO_BYTES /4);
out += CRYPTO_BYTES / 2;
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND384(i);
}
*(u64*)(out) = U64BIG(x00);
*(u32*)(out + 8) = U32BIG(x01);
*(u64*)tempData1 = U64BIG(x10);
*(u32*)(tempData1 + 8) = U32BIG(x11);
memcpy(out + 12, tempData1, CRYPTO_BYTES / 4);
return 0;
}
#include "api.h"
typedef unsigned char u8;
typedef unsigned long long u64;
#define PRH_ROUNDS 140
#define RATE 8
#define ROTR(x,n) (((x)>>(n))|((x)<<(64-(n))))
#define U64BIG(x) (x)
static const u8 constant8[255] = { 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47,
0x8e, 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25, 0x4b, 0x97, 0x2e,
0x5c, 0xb8, 0x70, 0xe0, 0xc0, 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64,
0xc9, 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37, 0x6e, 0xdc, 0xb9,
0x72, 0xe4, 0xc8, 0x90, 0x20, 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56,
0xad, 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac, 0x59, 0xb2, 0x65,
0xcb, 0x96, 0x2c, 0x58, 0xb0, 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d,
0xfb, 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a, 0xf5, 0xeb, 0xd7,
0xae, 0x5d, 0xba, 0x74, 0xe8, 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43,
0x86, 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7, 0x8f, 0x1e, 0x3c,
0x79, 0xf3, 0xe7, 0xce, 0x9c, 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62,
0xc5, 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4, 0x48, 0x91, 0x22,
0x45, 0x8a, 0x14, 0x29, 0x52, 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53,
0xa7, 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76, 0xec, 0xd9, 0xb3,
0x67, 0xcf, 0x9e, 0x3d, 0x7b, 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa,
0xf4, 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd, 0x9a, 0x35, 0x6a,
0xd4, 0xa8, 0x51, 0xa3, 0x46, 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07,
0x0e, 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab, 0x57, 0xaf, 0x5f,
0xbe, 0x7c, 0xf9, 0xf2, 0xe5, 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84,
0x09, 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe, 0xfc, 0xf8, 0xf0,
0xe1, 0xc2, 0x85, 0x0b, 0x17, 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6,
0x8d, 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80 };
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR1281(a,b,n) (((a)<<(n))|((b)>>(64-n)))
#define LOTR1282(a,b,n) (((b)<<(n))|((a)>>(64-n)))
#define ROUND512(i) {\
x00^=constant8[i];\
sbox(x00, x10, x20, x30, b10, b20, b30);\
sbox(x01, x11, x21, x31, b11, b21, b31);\
x10=LOTR1281(b10,b11,1);\
x20=LOTR1281(b20,b21,16);\
x30=LOTR1281(b30,b31,25);\
x11=LOTR1282(b10,b11,1);\
x21=LOTR1282(b20,b21,16);\
x31=LOTR1282(b30,b31,25);\
}
int crypto_hash(unsigned char *out, const unsigned char *in,
unsigned long long inlen) {
u64 b01, b11, b21, b31, b00, b10, b20, b30;
u64 t1, t2, t3, t5, t6, t8, t9, t11;
u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0, x31 = 0, x21 = 0, x11 = 0, x01 = 0;
u64 i;
u8 tempData[32];
// initialization
//absorb
while (inlen >= RATE) {
x00 ^= U64BIG(((u64*)in)[0]);
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND512(i);
}
inlen -= RATE;
in += RATE;
}
memset(tempData, 0, RATE);
memcpy(tempData, in, inlen * sizeof(unsigned char));
tempData[inlen] = 0x01;
x00 ^= U64BIG(((u64*)tempData)[0]);
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND512(i);
}
//sequeez
((u64*)out)[0] = U64BIG(x00);
((u64*)out)[1] = U64BIG(x01);
((u64*)out)[2] = U64BIG(x10);
((u64*)out)[3] = U64BIG(x11);
out += CRYPTO_BYTES / 2;
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND512(i);
}
((u64*)out)[0] = U64BIG(x00);
((u64*)out)[1] = U64BIG(x01);
((u64*)out)[2] = U64BIG(x10);
((u64*)out)[3] = U64BIG(x11);
return 0;
}
#include "api.h"
typedef unsigned char u8;
typedef unsigned long long u64;
#define PRH_ROUNDS 140
#define RATE 8
#define U64BIG(x) (x)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string.h>
#include <stdlib.h>
#include <emmintrin.h>//sse2 header file(include sse header file)
#include <pmmintrin.h> //SSE3(include emmintrin.h)
#include <tmmintrin.h>//SSSE3(include pmmintrin.h)
#include <smmintrin.h>//SSE4.1(include tmmintrin.h)
#include <nmmintrin.h>//SSE4.2(include smmintrin.h)
#include <immintrin.h>
#include <xmmintrin.h>
#include <wmmintrin.h>
#define U64BIG(x) (x)
#define U32BIG(x) (x)
static const u8 constant8[255] = { 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47,
0x8e, 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25, 0x4b, 0x97, 0x2e,
0x5c, 0xb8, 0x70, 0xe0, 0xc0, 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64,
0xc9, 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37, 0x6e, 0xdc, 0xb9,
0x72, 0xe4, 0xc8, 0x90, 0x20, 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56,
0xad, 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac, 0x59, 0xb2, 0x65,
0xcb, 0x96, 0x2c, 0x58, 0xb0, 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d,
0xfb, 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a, 0xf5, 0xeb, 0xd7,
0xae, 0x5d, 0xba, 0x74, 0xe8, 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43,
0x86, 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7, 0x8f, 0x1e, 0x3c,
0x79, 0xf3, 0xe7, 0xce, 0x9c, 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62,
0xc5, 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4, 0x48, 0x91, 0x22,
0x45, 0x8a, 0x14, 0x29, 0x52, 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53,
0xa7, 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76, 0xec, 0xd9, 0xb3,
0x67, 0xcf, 0x9e, 0x3d, 0x7b, 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa,
0xf4, 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd, 0x9a, 0x35, 0x6a,
0xd4, 0xa8, 0x51, 0xa3, 0x46, 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07,
0x0e, 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab, 0x57, 0xaf, 0x5f,
0xbe, 0x7c, 0xf9, 0xf2, 0xe5, 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84,
0x09, 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe, 0xfc, 0xf8, 0xf0,
0xe1, 0xc2, 0x85, 0x0b, 0x17, 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6,
0x8d, 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80 };
#define forward_sbox_SSE(a, b, c, d, f, g, h) \
{ \
tmm1 =_mm_xor_si128( a , all1 ); \
tmm2 =_mm_and_si128( b, tmm1 ); \
tmm3 =_mm_xor_si128( c , tmm2 ); \
h=_mm_xor_si128( d , tmm3 ); \
tmm5 =_mm_or_si128 ( b, c); \
tmm6=_mm_xor_si128( d, tmm1 ); \
g=_mm_xor_si128( tmm5, tmm6 ); \
tmm8=_mm_xor_si128( b, d ); \
tmm9=_mm_and_si128( tmm3, tmm6 ); \
a=_mm_xor_si128( tmm8, tmm9 ); \
tmm11=_mm_and_si128( g, tmm8 ); \
f=_mm_xor_si128( tmm3, tmm11 ); \
}
#define ROUND512(i) {\
state[0] = _mm_xor_si128(state[0], _mm_set_epi64x(0,(u64)constant8[i])); \
forward_sbox_SSE(state[0], state[1], state[2], state[3], out1, out2, out3);\
state[1] = _mm_or_si128(_mm_slli_epi64(out1, 1), _mm_srli_epi64(_mm_shuffle_epi32(out1, _MM_SHUFFLE(1, 0, 3, 2)), 63));\
state[2] = _mm_or_si128(_mm_slli_epi64(out2, 16), _mm_srli_epi64(_mm_shuffle_epi32(out2, _MM_SHUFFLE(1, 0, 3, 2)), 48));\
state[3] = _mm_or_si128(_mm_slli_epi64(out3, 25), _mm_srli_epi64(_mm_shuffle_epi32(out3, _MM_SHUFFLE(1, 0, 3, 2)), 39));\
}
int crypto_hash(unsigned char *out, const unsigned char *in,
unsigned long long inlen) {
__m128i all1 = _mm_set1_epi32(0xffffffff);
__m128i tmm0, tmm1, tmm2, tmm3, tmm5, tmm6, tmm8, tmm9, tmm11, out1, out2, out3;
__m128i state[4] = { 0 };
u8 i=0;
u8 tempData[8] = { 0 };
// initialization
//absorb
//rlen = inlen;
//RATE=8
while (inlen >= RATE) {
tmm0 = _mm_loadl_epi64((__m128i*)in);
state[0] = _mm_xor_si128(state[0], tmm0);
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND512(i);
}
inlen -= RATE;
in += RATE;
}
memset(tempData, 0, 8);
memcpy(tempData, in, inlen * sizeof(unsigned char));
tempData[inlen] = 0x01;
state[0] = _mm_xor_si128(state[0], _mm_loadl_epi64((__m128i*)(tempData)));
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND512(i);
}
//sequeez
memcpy(out, state, CRYPTO_BYTES / 2 * sizeof(unsigned char));
for (i = 0; i < PRH_ROUNDS; i++) {
ROUND512(i);
}
memcpy(out+ CRYPTO_BYTES / 2, state, CRYPTO_BYTES / 2 * sizeof(unsigned char));
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment