Commit eaa31add by Enrico Pozzobon

Merge branch 'email-submissions' into master

parents 41412efd 109ff80f
//k=n=tag=128 b=256 r=64 c=192
#define CRYPTO_KEYBYTES 16 // #define CRYPTO_KEYBYTES 16 //
#define CRYPTO_NSECBYTES 0 #define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
......
#include"auxFormat.h"
//#define PRINTFormatToU8
#define PRINTU8
unsigned char constant6Format[52] = {
/*constant6_aead_128v1:*/
0x01, 0x10, 0x02, 0x20, 0x04, 0x41, 0x11, 0x12, 0x22, 0x24, 0x45, 0x50, 0x03,
0x30, 0x06, 0x61, 0x15, 0x53, 0x33, 0x36, 0x67, 0x74, 0x46, 0x60, 0x05,
0x51, 0x13, 0x32, 0x26, 0x65, 0x54, 0x42, 0x21, 0x14, 0x43, 0x31, 0x16,
0x63, 0x35, 0x57, 0x72, 0x27, 0x75, 0x56, 0x62, 0x25, 0x55, 0x52, 0x23,
0x34, 0x47, 0x70, };
void P256(unsigned int *s, unsigned char *rc, unsigned char rounds) {
unsigned int reg1, reg2;
asm volatile (
"enc_loop: \n\t"
"/*add round const*/ \n\t"
"ldrb %[reg1], [%[rc]] \n\t"
"and %[reg2], %[reg1], 0xf \n\t"
"eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\
"eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\
"/*sbox first column*/ \n\t"
"mvns %[S_0], %[S_0] \n\t"
"ands %[reg1], %[S_2], %[S_0] \n\t"
"eors %[reg1], %[S_4], %[reg1] \n\t"
"orrs %[S_4], %[S_2], %[S_4] \n\t"
"eors %[S_0], %[S_6], %[S_0] \n\t"
"eors %[S_4], %[S_4], %[S_0] \n\t"
"eors %[reg2], %[S_2], %[S_6] \n\t"
"eors %[S_6], %[S_6], %[reg1] \n\t"
"ands %[S_0], %[reg1],%[S_0] \n\t"
"eors %[S_0], %[reg2],%[S_0] \n\t"
"ands %[S_2], %[S_4], %[reg2] \n\t"
"eors %[S_2], %[reg1], %[S_2] \n\t"
"/*sbox first column*/ \n\t"
"mvns %[S_1], %[S_1] \n\t"
"ands %[reg1], %[S_3], %[S_1] \n\t"
"eors %[reg1], %[S_5], %[reg1] \n\t"
"orrs %[S_5], %[S_3], %[S_5] \n\t"
"eors %[S_1], %[S_7], %[S_1] \n\t"
"eors %[S_5], %[S_5], %[S_1] \n\t"
"eors %[reg2], %[S_3], %[S_7] \n\t"
"eors %[S_7], %[S_7], %[reg1] \n\t"
"ands %[S_1], %[reg1],%[S_1] \n\t"
"eors %[S_1], %[reg2],%[S_1] \n\t"
"ands %[S_3], %[S_5], %[reg2] \n\t"
"eors %[S_3], %[reg1], %[S_3] \n\t"
"/*rotate shift left 1 bit*/ \n\t"
"mov %[reg1], %[S_3] \n\t"
"mov %[S_3], %[S_2] , ROR #31 \n\t"
"mov %[S_2], %[reg1] \n\t"
"/*rotate shift left 8 bits*/ \n\t"
"mov %[S_4], %[S_4] , ROR #28 \n\t"
"mov %[S_5], %[S_5] , ROR #28 \n\t"
"/*rotate shift left 25 bits*/ \n\t"
"mov %[reg1], %[S_6] \n\t"
"mov %[S_6], %[S_7] , ROR #20 \n\t"
"mov %[S_7], %[reg1] , ROR #19 \n\t"
"/*loop control*/ \n\t"
"adds %[rc], %[rc], #1 \n\t"
"subs %[ro], %[ro], #1 \n\t"
"bne enc_loop \n\t"
/* ----------------------------- */
: /* output variables - including inputs that are changed */
[ro] "+r" (rounds),[reg1] "=r" (reg1), [reg2] "=r" (reg2), [rc] "+r" (rc),
[S_0] "+r" (s[0]), [S_2] "+r" (s[2]), [S_4] "+r" (s[4]), [S_6] "+r" (s[6]) ,
[S_1] "+r" (s[1]), [S_3] "+r" (s[3]), [S_5] "+r" (s[5]), [S_7] "+r" (s[7])
: /* input variables */
: /* clobber registers for temporary values */
);
}
void packFormat(u32 * out, const u8 * in) {
u32 t0 = U32BIG(((u32* )in)[0]);
u32 t1 = U32BIG(((u32* )in)[1]);
u32 r0, r1;
r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1);
r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2);
r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4);
r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); //t0 odd even
r1 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= r1 ^ (r1 << 1);
r1 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= r1 ^ (r1 << 2);
r1 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= r1 ^ (r1 << 4);
r1 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= r1 ^ (r1 << 8); //t1 odd even
out[0] = (t1 & 0xFFFF0000) | (t0 >> 16); // t1.odd|t0.odd
out[1] = (t1 << 16) | (t0 & 0x0000FFFF); // t1.even|t0.even
}
void unpackFormat(u8 * out, u32 * in) {
u32 t[2] = { 0 };
t[1] = (in[0] & 0xFFFF0000) | (in[1] >> 16);
t[0] = (in[1] & 0x0000FFFF) | (in[0] << 16);
u32 r0, r1;
r0 = (t[0] ^ (t[0] >> 8)) & 0x0000FF00, t[0] ^= r0 ^ (r0 << 8);
r0 = (t[0] ^ (t[0] >> 4)) & 0x00F000F0, t[0] ^= r0 ^ (r0 << 4);
r0 = (t[0] ^ (t[0] >> 2)) & 0x0C0C0C0C, t[0] ^= r0 ^ (r0 << 2);
r0 = (t[0] ^ (t[0] >> 1)) & 0x22222222, t[0] ^= r0 ^ (r0 << 1);
r1 = (t[1] ^ (t[1] >> 8)) & 0x0000FF00, t[1] ^= r1 ^ (r1 << 8);
r1 = (t[1] ^ (t[1] >> 4)) & 0x00F000F0, t[1] ^= r1 ^ (r1 << 4);
r1 = (t[1] ^ (t[1] >> 2)) & 0x0C0C0C0C, t[1] ^= r1 ^ (r1 << 2);
r1 = (t[1] ^ (t[1] >> 1)) & 0x22222222, t[1] ^= r1 ^ (r1 << 1);
memcpy(out, t, 8 * sizeof(unsigned char));
}
void getU32Format(u32 *out, const u8* in) {
u32 r0, lo = U32BIG(((u32* )in)[0]);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
*out = lo;
}
#include <stdio.h> #include<malloc.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include"crypto_aead.h" #include"crypto_aead.h"
#include"api.h" #include"api.h"
#include"stdio.h"
#include <string.h> #include <string.h>
#define U32BIG(x) (x) #define U32BIG(x) (x)
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define sbox(a, b, c, d, e, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
typedef unsigned char u8; typedef unsigned char u8;
typedef unsigned int u32; typedef unsigned int u32;
typedef unsigned long long u64; typedef unsigned long long u64;
unsigned char constant6Format[52];
unsigned char constant7Format[68];
#define RATE (64 / 8)
#define PR0_ROUNDS 52
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
#define packFormat(out,in) {\ void packFormat(u32 * out, const u8 * in);
t1 = U32BIG(((u32*)in)[0]); \ void unpackFormat(u8 * out, u32 * in);
t2 = U32BIG(((u32*)in)[1]); \
t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \
t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \
t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \
t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \
t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \
t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \
t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \
t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \
out[0] = (t2 & 0xFFFF0000) | (t1 >> 16); \
out[1] = (t2 << 16) | (t1 & 0x0000FFFF); \
}
#define unpackFormat(out, in) {\
t2 = (in[0] & 0xFFFF0000) | (in[1] >> 16); \
t1 = (in[1] & 0x0000FFFF) | (in[0] << 16); \
t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \
t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \
t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \
t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \
t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \
t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \
t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \
t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \
*((u64*)out) = ((u64)t2 << 32 | t1); \
}
#define getU32Format(out, in) {\
t1, t2 = U32BIG(((u32*)in)[0]); \
t1 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t1 ^ (t1 << 1); \
t1 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t1 ^ (t1 << 2); \
t1 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t1 ^ (t1 << 4); \
t1 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t1 ^ (t1 << 8); \
*out = t2; \
}
#define ROUND256( constant6Format,lunNum) {\
s[0] ^= constant6Format[lunNum]>> 4;\
s[1] ^= constant6Format[lunNum]& 0x0f;\
sbox(s[0], s[2], s[4], s[6], s_temp[0], s_temp[2], s_temp[4], s_temp[6]);\
sbox(s[1], s[3], s[5], s[7], s_temp[1], s_temp[3], s_temp[5], s_temp[7]);\
s[0] = s_temp[0];\
s[1] = s_temp[1];\
s[2] = s_temp[3];\
s[3] = LOTR32(s_temp[2], 1);\
s[4] = LOTR32(s_temp[4], 4);\
s[5] = LOTR32(s_temp[5], 4);\
s[6] = LOTR32(s_temp[7], 12);\
s[7] = LOTR32(s_temp[6], 13);\
}
void printfFormat(char name[], u32 * in);
void printU8(char name[], u8 var[], long len, int offset); void printU8(char name[], u8 var[], long len, int offset);
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen, const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen, const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *nsec, const unsigned char *npub,
const unsigned char *npub, const unsigned char *k);
const unsigned char *k
);
int crypto_aead_decrypt( int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *m, unsigned long long *mlen, unsigned char *nsec, const unsigned char *c, unsigned long long clen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen, const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *npub, const unsigned char *k);
const unsigned char *k
);
#define CRYPTO_KEYBYTES 16 //
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include"auxFormat.h"
//#define PRINTFormatToU8
#define PRINTU8
unsigned char constant6Format[52] = {
/*constant6_aead_128v1:*/
0x01, 0x10, 0x02, 0x20, 0x04, 0x41, 0x11, 0x12, 0x22, 0x24, 0x45, 0x50, 0x03,
0x30, 0x06, 0x61, 0x15, 0x53, 0x33, 0x36, 0x67, 0x74, 0x46, 0x60, 0x05,
0x51, 0x13, 0x32, 0x26, 0x65, 0x54, 0x42, 0x21, 0x14, 0x43, 0x31, 0x16,
0x63, 0x35, 0x57, 0x72, 0x27, 0x75, 0x56, 0x62, 0x25, 0x55, 0x52, 0x23,
0x34, 0x47, 0x70, };
void P256(unsigned int *s, unsigned char *round, unsigned char lunNum) {
u32 s_temp[8] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
unsigned char i;
for (i = 0; i < lunNum; i++) {
s[0] ^= round[i] >> 4;
s[1] ^= round[i] & 0x0f;
sbox(s[0], s[2], s[4], s[6], s_temp[2], s_temp[4], s_temp[6]);
sbox(s[1], s[3], s[5], s[7], s[2], s_temp[5], s_temp[7]);
s[3] = LOTR32(s_temp[2], 1);
s[4] = LOTR32(s_temp[4], 4);
s[5] = LOTR32(s_temp[5], 4);
s[6] = LOTR32(s_temp[7], 12);
s[7] = LOTR32(s_temp[6], 13);
}
}
void packFormat(u32 * out, const u8 * in) {
u32 t0 = U32BIG(((u32* )in)[0]);
u32 t1 = U32BIG(((u32* )in)[1]);
u32 r0, r1;
r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1);
r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2);
r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4);
r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); //t0 odd even
r1 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= r1 ^ (r1 << 1);
r1 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= r1 ^ (r1 << 2);
r1 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= r1 ^ (r1 << 4);
r1 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= r1 ^ (r1 << 8); //t1 odd even
out[0] = (t1 & 0xFFFF0000) | (t0 >> 16); // t1.odd|t0.odd
out[1] = (t1 << 16) | (t0 & 0x0000FFFF); // t1.even|t0.even
}
void unpackFormat(u8 * out, u32 * in) {
u32 t[2] = { 0 };
t[1] = (in[0] & 0xFFFF0000) | (in[1] >> 16);
t[0] = (in[1] & 0x0000FFFF) | (in[0] << 16);
u32 r0, r1;
r0 = (t[0] ^ (t[0] >> 8)) & 0x0000FF00, t[0] ^= r0 ^ (r0 << 8);
r0 = (t[0] ^ (t[0] >> 4)) & 0x00F000F0, t[0] ^= r0 ^ (r0 << 4);
r0 = (t[0] ^ (t[0] >> 2)) & 0x0C0C0C0C, t[0] ^= r0 ^ (r0 << 2);
r0 = (t[0] ^ (t[0] >> 1)) & 0x22222222, t[0] ^= r0 ^ (r0 << 1);
r1 = (t[1] ^ (t[1] >> 8)) & 0x0000FF00, t[1] ^= r1 ^ (r1 << 8);
r1 = (t[1] ^ (t[1] >> 4)) & 0x00F000F0, t[1] ^= r1 ^ (r1 << 4);
r1 = (t[1] ^ (t[1] >> 2)) & 0x0C0C0C0C, t[1] ^= r1 ^ (r1 << 2);
r1 = (t[1] ^ (t[1] >> 1)) & 0x22222222, t[1] ^= r1 ^ (r1 << 1);
memcpy(out, t, 8 * sizeof(unsigned char));
}
void getU32Format(u32 *out, const u8* in) {
u32 r0, lo = U32BIG(((u32* )in)[0]);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
*out = lo;
}
#include<malloc.h>
#include"crypto_aead.h"
#include"api.h"
#include"stdio.h"
#include <string.h>
#define U32BIG(x) (x)
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
unsigned char constant6Format[52];
unsigned char constant7Format[68];
#define RATE (64 / 8)
#define PR0_ROUNDS 52
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
void packFormat(u32 * out, const u8 * in);
void unpackFormat(u8 * out, u32 * in);
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k);
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k);
#include"auxFormat.h"
void ProcessAssocData(u32 *s, const u8* ad, unsigned long long adlen) {
u32 dataFormat[2] = { 0 };
u8 tempData[8];
if (adlen) {
while (adlen >= RATE) {
packFormat(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
P256(s, constant6Format, PR_ROUNDS);
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
P256(s, constant6Format, PR_ROUNDS);
}
s[6] ^= 0x80000000;
}
void ProcessPlaintext(u32 *s, const u8* m, unsigned long long mlen,
unsigned char *c) {
u32 dataFormat[2] = { 0 };
u8 tempData[8] = { 0 };
if (mlen) {
while (mlen >= RATE) {
packFormat(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
unpackFormat(c, s);
P256(s, constant6Format, PR_ROUNDS);
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen * sizeof(unsigned char));
tempData[mlen] = 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
unpackFormat(tempData, s);
memcpy(c, tempData, mlen * sizeof(unsigned char));
//c+=mlen;
}
}
void Finalize_GenerateTag(u32 *s, unsigned char *c) {
P256(s, constant6Format, PRF_ROUNDS);
// return tag
unpackFormat(c, s);
unpackFormat((c + 8), (s + 2));
}
void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) {
packFormat(s, npub);
packFormat(s + 2, npub + 8);
packFormat(s + 4, k);
packFormat(s + 6, k + 8);
P256(s, constant6Format, PR0_ROUNDS);
}
void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c,
unsigned long long clen) {
u8 tempU8[32] = { 0 }, i;
u32 dataFormat[2] = { 0 };
u32 dataFormat_1[2] = { 0 };
if (clen) {
while (clen >= RATE) {
packFormat(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
unpackFormat(m, dataFormat_1);
s[0] = dataFormat[0];
s[1] = dataFormat[1];
P256(s, constant6Format, PR_ROUNDS);
clen -= RATE;
m += RATE;
c += RATE;
}
unpackFormat(tempU8, s);
for (i = 0; i < clen; ++i, ++m, ++c) {
*m = tempU8[i] ^ *c;
tempU8[i] = *c;
}
tempU8[i] ^= 0x01;
packFormat(s, tempU8);
}
}
int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m,
unsigned long long *mlen) {
u8 tempU8[16] = { 0 };
P256(s, constant6Format, PRF_ROUNDS);
// return tag
unpackFormat(tempU8, s);
unpackFormat((tempU8 + 8), (s + 2));
if (memcmp((void*) tempU8, (void*) (c), CRYPTO_ABYTES)) {
memset(m, 0, sizeof(unsigned char) * (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
u32 s[8] = { 0 };
*clen = mlen + CRYPTO_ABYTES;
//initialization
Initialize(s, npub, k);
// process associated data
ProcessAssocData(s, ad, adlen);
ProcessPlaintext(s, m, mlen, c);
// finalization
Finalize_GenerateTag(s, c + mlen);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u32 s[8] = { 0 };
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
//initialization
Initialize(s, npub, k);
// process associated data
ProcessAssocData(s, ad, adlen);
// process cipher
ProcessCiphertext(s, m, c, clen - CRYPTO_KEYBYTES);
// finalization
return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen);
}
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
.macro LFSR6_MACRO
bst rc, 5
bld tmp0, 0
bst rc, 4
bld tmp1, 0
eor tmp0, tmp1
ror tmp0
rol rc
andi rc, 0x3F
.endm
.macro LFSR7_MACRO
bst rc, 6
bld tmp0, 0
bst rc, 5
bld tmp1, 0
eor tmp0, tmp1
ror tmp0
rol rc
andi rc, 0x7F
.endm
.macro LFSR8_MACRO
bst rc, 7
bld tmp0, 0
bst rc, 5
bld tmp1, 0
eor tmp0, tmp1
bst rc, 4
bld tmp1, 0
eor tmp0, tmp1
bst rc, 3
bld tmp1, 0
eor tmp0, tmp1
ror tmp0
rol rc
.endm
.macro Sbox i0, i1, i2, i3
mov tmp0, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, tmp0
eor \i0, \i3
eor \i2, \i0
eor tmp0, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, tmp0
and tmp0, \i2
eor \i1, tmp0
.endm
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
#define CRYPTO_AEAD
//#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 256
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 128
/* For CRYPTO_HASH */
#define CRYPTO_BITS 256
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#ifdef __cplusplus
}
#endif
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
#include "assist.h"
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi rc, 0x01
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
round_loop_start:
rjmp AddRC_SubColumns_Start
load_columns_table:
rjmp load_column0
rjmp load_column1
rjmp load_column2
rjmp load_column3
rjmp load_column4
rjmp load_column5
rjmp load_column6
rjmp load_column7
rjmp amend_shiftRow
load_column0:
mov x3j, x30
rjmp Sbox_one_column
load_column1:
mov x30, x3j
mov x3j, x31
rjmp Sbox_one_column
load_column2:
mov x31, x3j
mov x3j, x32
rjmp Sbox_one_column
load_column3:
mov x32, x3j
mov x3j, x33
rjmp Sbox_one_column
load_column4:
mov x33, x3j
mov x3j, x34
rjmp Sbox_one_column
load_column5:
mov x34, x3j
mov x3j, x35
rjmp Sbox_one_column
load_column6:
mov x35, x3j
mov x3j, x36
rjmp Sbox_one_column
load_column7:
mov x36, x3j
mov x3j, x37
rjmp Sbox_one_column
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
LFSR_table:
rjmp LFSR6
rjmp LFSR7
LFSR6:
LFSR6_MACRO
rjmp LFSR_DONE
LFSR7:
LFSR7_MACRO
rjmp LFSR_DONE
#endif
;;;;;;;;;;;;;;;;;;;;;;;; Real Start
AddRC_SubColumns_Start:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
clr ccnt
ld x0j, Y
eor x0j, rc
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
ldi ZL, pm_lo8(LFSR_table)
ldi ZH, pm_hi8(LFSR_table)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH
adiw ZL, 1
ijmp
LFSR_DONE:
#elif defined(CRYPTO_AEAD)
LFSR6_MACRO ; only AEAD
#else
LFSR7_MACRO ; only HASH
#endif
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
ldi ZL, pm_lo8(load_columns_table)
ldi ZH, pm_hi8(load_columns_table)
ijmp
Sbox_one_column:
Sbox x0j, x1j, x2j, x3j
; 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- 3
; 4 3 2 1 0 7 6 5
; Store a byte to Row 0
st Y, x0j
; Store a byte combined with ShiftRow1
lsl t1j
mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left)
rol x1j
std Y + ROW_INBYTES, x1j
; Store a byte combined with ShiftRow2
inc ccnt
cpi ccnt, ROW_INBYTES
breq ROW2_WRAP
ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t2j
jmp NO_ROW2_WRAP
ROW2_WRAP:
std Y + ROW_INBYTES + 1, x2j
; remain ShiftRow3 to be done at 'amend_shiftRow'
NO_ROW2_WRAP:
adiw YL, 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
adiw ZL, 1
ijmp
amend_shiftRow:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x1j, Y
bst t1j, 7
bld x1j, 0
st Y, x1j
; <<< 1
mov x37, x3j
rol x3j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
mov x3j, x30
mov x30, x35
mov x35, x32
mov x32, x37
mov x37, x34
mov x34, x31
mov x31, x36
mov x36, x33
mov x33, x3j
dec rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
#include "assist.h"
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi rc, 0x01
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
round_loop_start:
rjmp AddRC_SubColumns_Start
load_columns_table:
rjmp load_column0
rjmp load_column1
rjmp load_column2
rjmp load_column3
rjmp load_column4
rjmp load_column5
rjmp load_column6
rjmp load_column7
rjmp load_column8
rjmp load_column9
rjmp load_columna
rjmp load_columnb
rjmp amend_shiftRow
load_column0:
mov x3j, x30
rjmp Sbox_one_column
load_column1:
mov x30, x3j
mov x3j, x31
rjmp Sbox_one_column
load_column2:
mov x31, x3j
mov x3j, x32
rjmp Sbox_one_column
load_column3:
mov x32, x3j
mov x3j, x33
rjmp Sbox_one_column
load_column4:
mov x33, x3j
mov x3j, x34
rjmp Sbox_one_column
load_column5:
mov x34, x3j
mov x3j, x35
rjmp Sbox_one_column
load_column6:
mov x35, x3j
mov x3j, x36
rjmp Sbox_one_column
load_column7:
mov x36, x3j
mov x3j, x37
rjmp Sbox_one_column
load_column8:
mov x37, x3j
mov x3j, x38
rjmp Sbox_one_column
load_column9:
mov x38, x3j
mov x3j, x39
rjmp Sbox_one_column
load_columna:
mov x39, x3j
mov x3j, x3a
rjmp Sbox_one_column
load_columnb:
mov x3a, x3j
mov x3j, x3b
rjmp Sbox_one_column
;;;;;;;;;;;;;;;;;;;;;;;; Real Start
AddRC_SubColumns_Start:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ldi ZL, pm_lo8(load_columns_table)
ldi ZH, pm_hi8(load_columns_table)
clr ccnt
ld x0j, Y
eor x0j, rc
LFSR7_MACRO
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
ijmp
Sbox_one_column:
Sbox x0j, x1j, x2j, x3j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; Store a byte to Row 0
st Y, x0j
; Store a byte combined with ShiftRow 1
lsl t1j
mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left)
rol x1j
std Y + ROW_INBYTES, x1j
; Store a byte combined with ShiftRow 2
inc ccnt
cpi ccnt, ROW_INBYTES
breq ROW2_WRAP
ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t2j
jmp NO_ROW2_WRAP
ROW2_WRAP:
std Y + ROW_INBYTES + 1, x2j
; remain ShiftRow3 to be done at 'amend_shiftRow'
NO_ROW2_WRAP:
adiw YL, 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
adiw ZL, 1
ijmp
amend_shiftRow:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x1j, Y
bst t1j, 7
bld x1j, 0
st Y, x1j
; >>> 1
mov x3b, x3j
ror x3j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
;mov x3j, x30
;mov x30, x35
;mov x35, x32
;mov x32, x37
;mov x37, x34
;mov x34, x31
;mov x31, x36
;mov x36, x33
;mov x33, x3j
mov x3j, x30
mov x30, x35
mov x35, x3a
mov x3a, x33
mov x33, x38
mov x38, x31
mov x31, x36
mov x36, x3b
mov x3b, x34
mov x34, x39
mov x39, x32
mov x32, x37
mov x37, x3j
dec rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
#include "assist.h"
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi rc, 0x01
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ld x3c, Y+
ld x3d, Y+
ld x3e, Y+
ld x3f, Y+
round_loop_start:
rjmp AddRC_SubColumns_Start
load_columns_table:
rjmp load_column0
rjmp load_column1
rjmp load_column2
rjmp load_column3
rjmp load_column4
rjmp load_column5
rjmp load_column6
rjmp load_column7
rjmp load_column8
rjmp load_column9
rjmp load_columna
rjmp load_columnb
rjmp load_columnc
rjmp load_columnd
rjmp load_columne
rjmp load_columnf
rjmp amend_shiftRow
load_column0:
mov x3j, x30
rjmp Sbox_one_column
load_column1:
mov x30, x3j
mov x3j, x31
rjmp Sbox_one_column
load_column2:
mov x31, x3j
mov x3j, x32
rjmp Sbox_one_column
load_column3:
mov x32, x3j
mov x3j, x33
rjmp Sbox_one_column
load_column4:
mov x33, x3j
mov x3j, x34
rjmp Sbox_one_column
load_column5:
mov x34, x3j
mov x3j, x35
rjmp Sbox_one_column
load_column6:
mov x35, x3j
mov x3j, x36
rjmp Sbox_one_column
load_column7:
mov x36, x3j
mov x3j, x37
rjmp Sbox_one_column
load_column8:
mov x37, x3j
mov x3j, x38
rjmp Sbox_one_column
load_column9:
mov x38, x3j
mov x3j, x39
rjmp Sbox_one_column
load_columna:
mov x39, x3j
mov x3j, x3a
rjmp Sbox_one_column
load_columnb:
mov x3a, x3j
mov x3j, x3b
rjmp Sbox_one_column
load_columnc:
mov x3b, x3j
mov x3j, x3c
rjmp Sbox_one_column
load_columnd:
mov x3c, x3j
mov x3j, x3d
rjmp Sbox_one_column
load_columne:
mov x3d, x3j
mov x3j, x3e
rjmp Sbox_one_column
load_columnf:
mov x3e, x3j
mov x3j, x3f
rjmp Sbox_one_column
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
LFSR_table:
rjmp LFSR7
rjmp LFSR8
LFSR7:
LFSR7_MACRO
rjmp LFSR_DONE
LFSR8:
LFSR8_MACRO
rjmp LFSR_DONE
#endif
;;;;;;;;;;;;;;;;;;;;;;;; Real Start
AddRC_SubColumns_Start:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
clr ccnt
ld x0j, Y
eor x0j, rc
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
ldi ZL, pm_lo8(LFSR_table)
ldi ZH, pm_hi8(LFSR_table)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH
adiw ZL, 1
ijmp
LFSR_DONE:
#elif defined(CRYPTO_AEAD)
LFSR7_MACRO ; only AEAD
#else
LFSR8_MACRO ; only HASH
#endif
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
ldd t2j, Y + 2 * ROW_INBYTES + 1
ldi ZL, pm_lo8(load_columns_table)
ldi ZH, pm_hi8(load_columns_table)
ijmp
Sbox_one_column:
Sbox x0j, x1j, x2j, x3j
; f e d c b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; Store a byte to Row 0
st Y, x0j
; Store a byte combined with ShiftRow1
lsl t1j
mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left)
rol x1j
std Y + ROW_INBYTES, x1j
; Store a byte combined with ShiftRow2
inc ccnt
cpi ccnt, ROW_INBYTES - 1
brsh ROW2_WRAP
ldd tmp0, Y + 2 * ROW_INBYTES + 2 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes
std Y + 2 * ROW_INBYTES + 2, x2j
mov x2j, t2j
mov t2j, tmp0
jmp NO_ROW2_WRAP
ROW2_WRAP:
std Y + ROW_INBYTES + 2, x2j
mov x2j, t2j
; remain ShiftRow3 to be done at 'amend_shiftRow'
NO_ROW2_WRAP:
adiw YL, 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
adiw ZL, 1
ijmp
amend_shiftRow:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x1j, Y
bst t1j, 7
bld x1j, 0
st Y, x1j
; <<< 1
mov x3f, x3j
rol x3j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
rol x38
rol x39
rol x3a
rol x3b
rol x3c
rol x3d
rol x3e
rol x3f
; <<< 24
; f e d c b a 9 8 7 6 5 4 3 2 1 0 =>
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
mov x3j, x30
mov x30, x3d
mov x3d, x3a
mov x3a, x37
mov x37, x34
mov x34, x31
mov x31, x3e
mov x3e, x3b
mov x3b, x38
mov x38, x35
mov x35, x32
mov x32, x3f
mov x3f, x3c
mov x3c, x39
mov x39, x36
mov x36, x33
mov x33, x3j
dec rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
st Y+, x3c
st Y+, x3d
st Y+, x3e
st Y+, x3f
POP_CONFLICT
ret
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
#define mclen r16
#define radlen r17
#define tcnt r17
#define tmp0 r20
#define tmp1 r21
#define cnt0 r22
#define rn r23
#define rate r24
;
; ; AEDH = 0b000: for authenticate AD
; ; AEDH = 0b001: for encryption
; ; AEDH = 0b011: for decryption
; ; AEDH = 0b100: for hash
; #define AEDH r25 ; Register used globally within this program
;
; #define x30 r0 ; Register used without overlapping
; #define x31 r1 ; Register used without overlapping
; #define x32 r2 ; Register used without overlapping
; #define x33 r3 ; Register used without overlapping
; #define x34 r4 ; Register used without overlapping
; #define x35 r5 ; Register used without overlapping
; #define x36 r6 ; Register used without overlapping
; #define x37 r7 ; Register used without overlapping
; #define x38 r8 ; Register used without overlapping
; #define x39 r9 ; Register used without overlapping
; #define x3a r10 ; Register used without overlapping
; #define x3b r11 ; Register used without overlapping
; #define x3c r12 ; Register used without overlapping
; #define x3d r13 ; Register used without overlapping
; #define x3e r14 ; Register used without overlapping
; #define x3f r15 ; Register used without overlapping
;
; #define x0j r16 ; Register used overlapped, should be backed up before using
; #define x1j r17 ; Register used overlapped, should be backed up before using
; #define x2j r18 ; Register used overlapped, should be backed up before using
; #define x3j r19 ; Register used overlapped, should be backed up before using
;
; ; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit),
; ; will not be interupt with LFSR which uses the overlapped register tmp1
; #define t2j r21 ; Temporary register, used freely
; #define t1j r22 ; Temporary register, used freely
; #define t3j r23 ; Temporary register, used freely
;
; #define rc r24 ; Register used overlapped, should be backed up before using
; #define rcnt r26 ; Register used overlapped, should be backed up before using
; #define ccnt r27 ; Register used overlapped, should be backed up before using
#define AEDH r25
#define x30 r0
#define x31 r1
#define x32 r2
#define x33 r3
#define x34 r4
#define x35 r5
#define x36 r6
#define x37 r7
#define x38 r8
#define x39 r9
#define x3a r10
#define x3b r11
#define x3c r12
#define x3d r13
#define x3e r14
#define x3f r15
#define x0j r16
#define x1j r17
#define x2j r18
#define x3j r19
; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit),
; will not be interupt with LFSR which uses the overlapped register tmp1
#define t2j r21
#define t1j r22
#define t3j r23
#define rc r24
#define rcnt r26
#define ccnt r27
#if (STATE_INBITS==256)
#include "knot256.h"
#elif (STATE_INBITS==384)
#include "knot384.h"
#elif (STATE_INBITS==512)
#include "knot512.h"
#else
#error "Not specified key size and state size"
#endif
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r25
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r25
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
#define CRYPTO_AEAD
//#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 256
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 128
/* For CRYPTO_HASH */
#define CRYPTO_BITS 256
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#ifdef __cplusplus
}
#endif
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x10 r0
#define x11 r1
#define x12 r2
#define x13 r3
#define x14 r4
#define x15 r5
#define x16 r6
#define x17 r7
; an intentionally arrangement of registers to facilitate movw
#define x20 r8
#define x21 r10
#define x22 r12
#define x23 r14
#define x24 r9
#define x25 r11
#define x26 r13
#define x27 r15
; an intentionally arrangement of registers to facilitate movw
#define x30 r16
#define x35 r18
#define x32 r20
#define x37 r22
#define x34 r17
#define x31 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
mov t0j, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x10, Y+
ld x11, Y+
ld x12, Y+
ld x13, Y+
ld x14, Y+
ld x15, Y+
ld x16, Y+
ld x17, Y+
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
#else
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
; SubColumns
Sbox x0j, x10, x20, x30
st Y+, x0j
ld x0j, Y
Sbox x0j, x11, x21, x31
st Y+, x0j
ld x0j, Y
Sbox x0j, x12, x22, x32
st Y+, x0j
ld x0j, Y
Sbox x0j, x13, x23, x33
st Y+, x0j
ld x0j, Y
Sbox x0j, x14, x24, x34
st Y+, x0j
ld x0j, Y
Sbox x0j, x15, x25, x35
st Y+, x0j
ld x0j, Y
Sbox x0j, x16, x26, x36
st Y+, x0j
ld x0j, Y
Sbox x0j, x17, x27, x37
st Y, x0j
; ShiftRows
; <<< 1
mov t0j, x17
rol t0j
rol x10
rol x11
rol x12
rol x13
rol x14
rol x15
rol x16
rol x17
; <<< 8
; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7
;mov t0j, x27
;mov x27, x26
;mov x26, x25
;mov x25, x24
;mov x24, x23
;mov x23, x22
;mov x22, x21
;mov x21, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x23 ; t1j:t0j <= x27:x23
movw x23, x22 ; x27:x23 <= x26:x22
movw x22, x21 ; x26:x22 <= x25:x21
movw x21, x20 ; x25:x21 <= x24:x20
mov x20, t1j ; x20 <= t1j
mov x24, t0j ; x24 <= t0j
; <<< 1
mov t0j, x37
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
;mov t0j, x30
;mov x30, x35
;mov x35, x32
;mov x32, x37
;mov x37, x34
;mov x34, x31
;mov x31, x36
;mov x36, x33
;mov x33, t0j
; an intentionally arrangement of registers to facilitate movw
;x30 r16
;x35 r18
;x32 r20
;x37 r22
;x34 r17
;x31 r19
;x36 r21
;x33 r23
movw t0j, x30 ; t1j:t0j <= x34:x30
movw x30, x35 ; x34:x30 <= x31:x35
movw x35, x32 ; x31:x35 <= x36:x32
movw x32, x37 ; x36:x32 <= x33:x37
mov x37, t1j ; x37 <= x34
mov x33, t0j ; x33 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
st Y+, x10
st Y+, x11
st Y+, x12
st Y+, x13
st Y+, x14
st Y+, x15
st Y+, x16
st Y+, x17
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
#else
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
; an intentionally arrangement of registers to facilitate movw
#define x20 r0
#define x21 r2
#define x22 r4
#define x23 r6
#define x24 r8
#define x25 r10
#define x26 r1
#define x27 r3
#define x28 r5
#define x29 r7
#define x2a r9
#define x2b r11
; an intentionally arrangement of registers to facilitate movw
#define x30 r22
#define x35 r20
#define x3a r18
#define x33 r16
#define x38 r14
#define x31 r12
#define x36 r23
#define x3b r21
#define x34 r19
#define x39 r17
#define x32 r15
#define x37 r13
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro OneColumn i0, i1, i2, i3
ld \i0, Y
ldd \i1, Y + ROW_INBYTES
Sbox \i0, \i1, \i2, \i3
st Y+, \i0
rol \i1 ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, \i1
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x28, Y+
ld x29, Y+
ld x2a, Y+
ld x2b, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, x1j
OneColumn x0j, x1j, x21, x31
OneColumn x0j, x1j, x22, x32
OneColumn x0j, x1j, x23, x33
OneColumn x0j, x1j, x24, x34
OneColumn x0j, x1j, x25, x35
OneColumn x0j, x1j, x26, x36
OneColumn x0j, x1j, x27, x37
OneColumn x0j, x1j, x28, x38
OneColumn x0j, x1j, x29, x39
OneColumn x0j, x1j, x2a, x3a
OneColumn x0j, x1j, x2b, x3b
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; ShiftRows -- the last two rows
; <<< 8
; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b
movw t0j, x25 ; t1j:t0j <= x2b:x25
movw x25, x24 ; x2b:x25 <= x2a:x24
movw x24, x23 ; x2a:x24 <= x29:x23
movw x23, x22 ; x29:x23 <= x28:x22
movw x22, x21 ; x28:x22 <= x27:x21
movw x21, x20 ; x27:x21 <= x26:x20
mov x26, t0j ; x26 <= x25
mov x20, t1j ; x20 <= x2b
; >>> 1
mov t0j, x3b
ror t0j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
; mov x3j, x30
; mov x30, x35
; mov x35, x3a
; mov x3a, x33
; mov x33, x38
; mov x38, x31
; mov x31, x36
; mov x36, x3b
; mov x3b, x34
; mov x34, x39
; mov x39, x32
; mov x32, x37
; mov x37, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r22
; x35 r20
; x3a r18
; x33 r16
; x38 r14
; x31 r12
; x36 r23
; x3b r21
; x34 r19
; x39 r17
; x32 r15
; x37 r13
movw t0j, x30 ; t1j:t0j <= x36:x30
movw x30, x35 ; x36:x30 <= x3b:x35
movw x35, x3a ; x3b:x35 <= x34:x3a
movw x3a, x33 ; x34:x3a <= x39:x33
movw x33, x38 ; x39:x33 <= x32:x38
movw x38, x31 ; x32:x38 <= x37:x31
mov x31, t1j ; x31 <= x36
mov x37, t0j ; x37 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x28
st Y+, x29
st Y+, x2a
st Y+, x2b
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
#define mclen r16
#define radlen r17
#define tcnt r17
#define tmp0 r20
#define tmp1 r21
#define cnt0 r22
#define rn r23
#define rate r24
; AEDH = 0b000: for authenticate AD
; AEDH = 0b001: for encryption
; AEDH = 0b011: for decryption
; AEDH = 0b100: for hash
#define AEDH r25
#define rcnt r26
#if (STATE_INBITS==256)
#include "knot256.h"
#elif (STATE_INBITS==384)
#include "knot384.h"
#elif (STATE_INBITS==512)
#include "knot512.h"
#else
#error "Not specified key size and state size"
#endif
#include"auxFormat.h"
//puck begin//
void unpackU96FormatToThreePacket(u8 * out, u32 * in) {
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
u32 t1_32, t2_64, t2_65;
u32 t[3] = { 0 };
temp0[0] = in[0] & 0xffe00000;
temp1[0] = (in[0] & 0x001ffc00) << 11;
temp2[0] = (in[0] & 0x000003ff) << 22;
temp0[1] = in[1] & 0xffe00000;
temp1[1] = (in[1] & 0x001ff800) << 11;
t2_64 = ((in[1] & 0x00000400) << 21);
temp2[1] = (in[1] & 0x000003ff) << 22;
temp0[2] = in[2] & 0xffc00000;
t1_32 = ((in[2] & 0x00200000) << 10);
temp1[2] = (in[2] & 0x001ff800) << 11;
t2_65 = ((in[2] & 0x00000400) << 20);
temp2[2] = (in[2] & 0x000003ff) << 22;
unpuckU32ToThree(temp0[0]);
unpuckU32ToThree(temp0[1]);
unpuckU32ToThree(temp0[2]);
t[2] = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2;
unpuckU32ToThree(temp1[0]);
unpuckU32ToThree(temp1[1]);
unpuckU32ToThree(temp1[2]);
t[1] = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1);
unpuckU32ToThree(temp2[0]);
unpuckU32ToThree(temp2[1]);
unpuckU32ToThree(temp2[2]);
t[0] = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2);
memcpy(out, t, 12 * sizeof(unsigned char));
}
void packU96FormatToThreePacket(u32 * out, u8 * in) {
u32 t0 = U32BIG(((u32*)in)[2]);
u32 t1 = U32BIG(((u32*)in)[1]);
u32 t2 = U32BIG(((u32*)in)[0]);
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6;
t1 = t1 << 1;
t2 = t2 << 2;
temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2;
puckU32ToThree(temp0[0]);
puckU32ToThree(temp0[1]);
puckU32ToThree(temp0[2]);
temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2;
puckU32ToThree(temp1[0]);
puckU32ToThree(temp1[1]);
puckU32ToThree(temp1[2]);
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2;
puckU32ToThree(temp2[0]);
puckU32ToThree(temp2[1]);
puckU32ToThree(temp2[2]);
out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22);
out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22);
out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22);
}
void packU32FormatToThreePacket(u32 * out, u8 * in) {
u32 t2 = U32BIG(((u32*)in)[0]);
u32 temp2[3] = { 0 };
u8 t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6;
t2 = t2 << 2;
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2;
puckU32ToThree(temp2[0]);
puckU32ToThree(temp2[1]);
puckU32ToThree(temp2[2]);
out[0] = (temp2[0] >> 22);
out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22);
out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22);
}
void unpackU32FormatToThreePacket(u8 * out, u32 * in) {
u32 temp2[3] = { 0 };
u32 t2_64, t2_65;
u32 t2;
temp2[0] = (in[0] & 0x000003ff) << 22;
t2_64 = ((in[1] & 0x00000400) << 21);
temp2[1] = (in[1] & 0x000003ff) << 22;
t2_65 = ((in[2] & 0x00000400) << 20);
temp2[2] = (in[2] & 0x000003ff) << 22;
unpuckU32ToThree(temp2[0]);
unpuckU32ToThree(temp2[1]);
unpuckU32ToThree(temp2[2]);
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2);
*(u32*)(out) = U32BIG(t2);
}
void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) {
u32 rci,t1,t2;
unsigned char i;
for (i = 0; i < lunNum; i++) {
rci=constant7Format[i];\
P384_ARC_SC1(rci,s[3],s[6],s[9]); \
P384_2SC(s[1],s[4],s[7],s[10],s[2],s[5],s[8],s[11]);\
P384_SR();\
}
}
//12*7=84
unsigned char constant7Format[80] = {
/*constant7Format[127]:*/
0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90,
0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2,
0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99,
0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b,
0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92,
0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8,
0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,};
//#include<malloc.h>
#include"crypto_aead.h" #include"crypto_aead.h"
#include"api.h" #include"api.h"
#include <string.h>
#include <stdio.h> #include <stdio.h>
#include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#define U32BIG(x) (x) #define U32BIG(x) (x)
typedef unsigned char u8; typedef unsigned char u8;
typedef unsigned int u32; typedef unsigned int u32;
typedef unsigned long long u64; typedef unsigned long long u64;
#define aead_RATE (192 / 8)
#define PR0_ROUNDS 76
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
//////////////////puck begin //////////////////puck begin
//&:5 <<:4 |:4 //&:5 <<:4 |:4
#define puckU32ToThree(x){\ #define puckU32ToThree(x){\
...@@ -31,119 +35,94 @@ x = (x | (x >> 8)) & 0xf00f00f0;\ ...@@ -31,119 +35,94 @@ x = (x | (x >> 8)) & 0xf00f00f0;\
x = (x | (x >> 4)) & 0xc30c30c3;\ x = (x | (x >> 4)) & 0xc30c30c3;\
x = (x | (x >> 2)) & 0x92492492;\ x = (x | (x >> 2)) & 0x92492492;\
} }
//ʹ u8 t2_64, t2_65;u32 temp2[3];t2; unsigned char constant7Format[80];
#define packU32FormatToThreePacket( out, in) {\
t2 = U32BIG(((u32*)in)[0]); \
t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t2 = t2 << 2; \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp2[0] >> 22); \
out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
//t9 t1 t2 t1_32 t2_64 t2_65 temp0[3] temp1[3] temp2[3]
#define packU96FormatToThreePacket(out, in) {\
t9 = U32BIG(((u32*)in)[2]); \
t1 = U32BIG(((u32*)in)[1]); \
t2 = U32BIG(((u32*)in)[0]); \
t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t1 = t1 << 1; \
t2 = t2 << 2; \
temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \
puckU32ToThree(temp0[0]); \
puckU32ToThree(temp0[1]); \
puckU32ToThree(temp0[2]); \
temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \
puckU32ToThree(temp1[0]); \
puckU32ToThree(temp1[1]); \
puckU32ToThree(temp1[2]); \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \
out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
//ʹ u8 t2_64, t2_65;u32 temp2[3];t2;
#define unpackU32FormatToThreePacket(out, in) {\
temp2[0] = (in[0] & 0x000003ff) << 22; \
t2_64 = ((in[1] & 0x00000400) << 21); \
temp2[1] = (in[1] & 0x000003ff) << 22; \
t2_65 = ((in[2] & 0x00000400) << 20); \
temp2[2] = (in[2] & 0x000003ff) << 22; \
unpuckU32ToThree(temp2[0]); \
unpuckU32ToThree(temp2[1]); \
unpuckU32ToThree(temp2[2]); \
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \
*(u32*)(out) = U32BIG(t2); \
}
//u32 temp0[3] = { 0 };u32 temp1[3] = { 0 };u32 temp2[3] = { 0 };u32 t1_32, t2_64, t2_65;t9,t1,t2,
#define unpackU96FormatToThreePacket( out, in) {\
temp0[0] = in[0] & 0xffe00000; \
temp1[0] = (in[0] & 0x001ffc00) << 11; \
temp2[0] = (in[0] & 0x000003ff) << 22; \
temp0[1] = in[1] & 0xffe00000; \
temp1[1] = (in[1] & 0x001ff800) << 11; \
t2_64 = ((in[1] & 0x00000400) << 21); \
temp2[1] = (in[1] & 0x000003ff) << 22; \
temp0[2] = in[2] & 0xffc00000; \
t1_32 = ((in[2] & 0x00200000) << 10); \
temp1[2] = (in[2] & 0x001ff800) << 11; \
t2_65 = ((in[2] & 0x00000400) << 20); \
temp2[2] = (in[2] & 0x000003ff) << 22; \
unpuckU32ToThree(temp0[0]); \
unpuckU32ToThree(temp0[1]); \
unpuckU32ToThree(temp0[2]); \
t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \
unpuckU32ToThree(temp1[0]); \
unpuckU32ToThree(temp1[1]); \
unpuckU32ToThree(temp1[2]); \
t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \
unpuckU32ToThree(temp2[0]); \
unpuckU32ToThree(temp2[1]); \
unpuckU32ToThree(temp2[2]); \
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \
*(u32*)(out) = U32BIG(t2); \
*(u32*)(out + 4) = U32BIG(t1); \
*(u32*)(out + 8) = U32BIG(t9); \
}
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define sbox(a, b, c, d, e, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define U96_BIT_LOTR32_1(t0,t1,t2,t3,t4,t5){\
t3= t1;\
t4 = t2;\
t5 = LOTR32(t0, 1); \
}
#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t2, 2);\
t4 =LOTR32(t0, 3);\
t5 = LOTR32(t1, 3); \
}
//55=3*18+1
#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t1, 18); \
t4 = LOTR32(t2, 18);\
t5 = LOTR32(t0, 19); \
}
/*
s0 s1 s2
s3 s4 s5
s6 s7 s8
s9 s10 s11
*/
void printU32State(char name[], u32* var, long len); #define P384_ARC_SC1(rci,S2,S3,S4) \
void printfU96Format(char name[], u32 * s); do { \
//////////////////puck end __asm__ __volatile__ ( \
void printU8(char name[], u8 var[], int len, int offset); "/*add round const s0 s1 s2 */ \n\t"\
void printfU96Format(char name[], u32 * s); "ands %[t1], %[rci], #0xc0\n\t" \
"eors %[S_0], %[S_0], %[t1], LSR #6 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\
"ands %[t1], %[rci], #0x38\n\t" \
"eors %[S_1], %[S_1], %[t1], LSR #3 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\
"ands %[t1], %[rci], #0x7\n\t" \
"eors %[S_3], %[S_3], %[t1] \n\t" /*s[2] ^= constant7Format[lunNum] & 0x7;*/\
"/*sbox column*/ \n\t"\
"mvns %[S_0], %[S_0] \n\t"\
"ands %[t1], %[S_2], %[S_0] \n\t"\
"eors %[t1], %[S_4], %[t1] \n\t"\
"orrs %[S_4], %[S_2], %[S_4] \n\t"\
"eors %[S_0], %[S_6], %[S_0] \n\t"\
"eors %[S_4], %[S_4], %[S_0] \n\t"\
"eors %[t2], %[S_2], %[S_6] \n\t"\
"eors %[S_6], %[S_6], %[t1] \n\t"\
"ands %[S_0], %[t1],%[S_0] \n\t"\
"eors %[S_0], %[t2],%[S_0] \n\t"\
"ands %[S_2], %[S_4], %[t2] \n\t"\
"eors %[S_2], %[t1], %[S_2] \n\t"\
: /* output variables - including inputs that are changed */\
[t1] "=r" (t1), [t2] "=r" (t2), [rci] "+r" (rci), \
[S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_3] "+r" (s[2]),\
[S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \
: : );\
}while (0)
#define P384_2SC(S1,S2,S3,S4,S5,S6,S7,S8) \
do { \
__asm__ __volatile__ ( \
"/*sbox column*/ \n\t"\
"mvns %[S_0], %[S_0] \n\t"\
"ands %[t1], %[S_2], %[S_0] \n\t"\
"eors %[t1], %[S_4], %[t1] \n\t"\
"orrs %[S_4], %[S_2], %[S_4] \n\t"\
"eors %[S_0], %[S_6], %[S_0] \n\t"\
"eors %[S_4], %[S_4], %[S_0] \n\t"\
"eors %[t2], %[S_2], %[S_6] \n\t"\
"eors %[S_6], %[S_6], %[t1] \n\t"\
"ands %[S_0], %[t1],%[S_0] \n\t"\
"eors %[S_0], %[t2],%[S_0] \n\t"\
"ands %[S_2], %[S_4], %[t2] \n\t"\
"eors %[S_2], %[t1], %[S_2] \n\t"\
"/*sbox column*/ \n\t"\
"mvns %[S_1], %[S_1] \n\t"\
"ands %[t1], %[S_3], %[S_1] \n\t"\
"eors %[t1], %[S_5], %[t1] \n\t"\
"orrs %[S_5], %[S_3], %[S_5] \n\t"\
"eors %[S_1], %[S_7], %[S_1] \n\t"\
"eors %[S_5], %[S_5], %[S_1] \n\t"\
"eors %[t2], %[S_3], %[S_7] \n\t"\
"eors %[S_7], %[S_7], %[t1] \n\t"\
"ands %[S_1], %[t1],%[S_1] \n\t"\
"eors %[S_1], %[t2],%[S_1] \n\t"\
"ands %[S_3], %[S_5], %[t2] \n\t"\
"eors %[S_3], %[t1], %[S_3] \n\t"\
: /* output variables - including inputs that are changed */\
[t1] "=r" (t1), [t2] "=r" (t2),\
[S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) ,\
[S_1] "+r" (S5), [S_3] "+r" (S6), [S_5] "+r" (S7), [S_7] "+r" (S8)\
: : );\
}while (0)
#define P384_SR() \
do { \
__asm__ __volatile__ ( \
"/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\
"mov %[t1], %[S_3] \n\t"\
"mov %[S_3], %[S_4] \n\t"\
"mov %[S_4], %[S_5] \n\t"\
"ROR %[S_5], %[t1] , #31 \n\t"\
"/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t"\
"mov %[t1], %[S_8] \n\t"\
"ROR %[S_8], %[S_7] , #29 \n\t"\
"ROR %[S_7], %[S_6] , #29 \n\t"\
"ROR %[S_6], %[t1] , #30 \n\t"\
"/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t"\
"mov %[t1], %[S_9] \n\t"\
"ROR %[S_9], %[S_10] , #14 \n\t"\
"ROR %[S_10], %[S_11] , #14 \n\t"\
"ROR %[S_11], %[t1] , #13 \n\t"\
: /* output variables - including inputs that are changed */\
[t1] "=r" (t1),\
[S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\
[S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\
[S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\
: : );\
}while (0)
int crypto_aead_encrypt( int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen, unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen, const unsigned char *m, unsigned long long mlen,
......
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include"auxFormat.h"
//puck begin//
void unpackU96FormatToThreePacket(u8 * out, u32 * in) {
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
u32 t1_32, t2_64, t2_65;
u32 t[3] = { 0 };
temp0[0] = in[0] & 0xffe00000;
temp1[0] = (in[0] & 0x001ffc00) << 11;
temp2[0] = (in[0] & 0x000003ff) << 22;
temp0[1] = in[1] & 0xffe00000;
temp1[1] = (in[1] & 0x001ff800) << 11;
t2_64 = ((in[1] & 0x00000400) << 21);
temp2[1] = (in[1] & 0x000003ff) << 22;
temp0[2] = in[2] & 0xffc00000;
t1_32 = ((in[2] & 0x00200000) << 10);
temp1[2] = (in[2] & 0x001ff800) << 11;
t2_65 = ((in[2] & 0x00000400) << 20);
temp2[2] = (in[2] & 0x000003ff) << 22;
unpuckU32ToThree(temp0[0]);
unpuckU32ToThree(temp0[1]);
unpuckU32ToThree(temp0[2]);
t[2] = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2;
unpuckU32ToThree(temp1[0]);
unpuckU32ToThree(temp1[1]);
unpuckU32ToThree(temp1[2]);
t[1] = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1);
unpuckU32ToThree(temp2[0]);
unpuckU32ToThree(temp2[1]);
unpuckU32ToThree(temp2[2]);
t[0] = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2);
memcpy(out, t, 12 * sizeof(unsigned char));
}
void packU96FormatToThreePacket(u32 * out, u8 * in) {
u32 t0 = U32BIG(((u32*)in)[2]);
u32 t1 = U32BIG(((u32*)in)[1]);
u32 t2 = U32BIG(((u32*)in)[0]);
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6;
t1 = t1 << 1;
t2 = t2 << 2;
temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2;
puckU32ToThree(temp0[0]);
puckU32ToThree(temp0[1]);
puckU32ToThree(temp0[2]);
temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2;
puckU32ToThree(temp1[0]);
puckU32ToThree(temp1[1]);
puckU32ToThree(temp1[2]);
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2;
puckU32ToThree(temp2[0]);
puckU32ToThree(temp2[1]);
puckU32ToThree(temp2[2]);
out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22);
out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22);
out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22);
}
void packU32FormatToThreePacket(u32 * out, u8 * in) {
u32 t2 = U32BIG(((u32*)in)[0]);
u32 temp2[3] = { 0 };
u8 t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6;
t2 = t2 << 2;
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2;
puckU32ToThree(temp2[0]);
puckU32ToThree(temp2[1]);
puckU32ToThree(temp2[2]);
out[0] = (temp2[0] >> 22);
out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22);
out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22);
}
void unpackU32FormatToThreePacket(u8 * out, u32 * in) {
u32 temp2[3] = { 0 };
u32 t2_64, t2_65;
u32 t2;
temp2[0] = (in[0] & 0x000003ff) << 22;
t2_64 = ((in[1] & 0x00000400) << 21);
temp2[1] = (in[1] & 0x000003ff) << 22;
t2_65 = ((in[2] & 0x00000400) << 20);
temp2[2] = (in[2] & 0x000003ff) << 22;
unpuckU32ToThree(temp2[0]);
unpuckU32ToThree(temp2[1]);
unpuckU32ToThree(temp2[2]);
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2);
*(u32*)(out) = U32BIG(t2);
}
void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) {
u32 s_temp[12] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
unsigned char i;
for (i = 0; i < lunNum; i++) {
s[0] ^= (round[i] >> 6) & 0x3;\
s[1] ^= (round[i] >> 3) & 0x7;\
s[2] ^= round[i] & 0x7;\
sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\
sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\
sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\
s[5] = LOTR32(s_temp[3], 1); \
U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\
U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\
}
}
//12*7=84
unsigned char constant7Format[80] = {
/*constant7Format[127]:*/
0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90,
0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2,
0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99,
0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b,
0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92,
0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8,
0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,};
#include"crypto_aead.h"
#include"api.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define U32BIG(x) (x)
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#define aead_RATE (192 / 8)
#define PR0_ROUNDS 76
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t2, 2);\
t4 =LOTR32(t0, 3);\
t5 = LOTR32(t1, 3); \
}
//55=3*18+1
#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t1, 18); \
t4 = LOTR32(t2, 18);\
t5 = LOTR32(t0, 19); \
}
//////////////////puck begin
//&:5 <<:4 |:4
#define puckU32ToThree(x){\
x &= 0x92492492;\
x = (x | (x << 2)) & 0xc30c30c3;\
x = (x | (x << 4)) & 0xf00f00f0;\
x = (x | (x << 8)) & 0xff0000ff;\
x = (x | (x << 16)) & 0xfff00000;\
}
#define unpuckU32ToThree(x){\
x &= 0xfff00000;\
x = (x | (x >> 16)) & 0xff0000ff;\
x = (x | (x >> 8)) & 0xf00f00f0;\
x = (x | (x >> 4)) & 0xc30c30c3;\
x = (x | (x >> 2)) & 0x92492492;\
}
void packU96FormatToThreePacket(u32 * out, u8 * in);
void unpackU96FormatToThreePacket(u8 * out, u32 * in);
unsigned char constant7Format[80];
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#include"auxFormat.h"
void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) {
u8 tempData[24] = { 0 };
packU96FormatToThreePacket(s, npub);
memcpy(tempData, npub + 12, sizeof(unsigned char) * 4);
memcpy(tempData + 4, k, sizeof(unsigned char) * 16);
packU96FormatToThreePacket(s + 3, tempData);
packU96FormatToThreePacket(s + 6, tempData + 12);
s[9] = 0x80000000;
P384(s, constant7Format, PR0_ROUNDS);
}
void ProcessAssocData(u32 *s, const u8* ad, unsigned long long adlen) {
u32 dataFormat[6] = { 0 };
u8 tempData[24] = { 0 };
if (adlen) {
while (adlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket(dataFormat + 3, ad + 12);
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
P384(s, constant7Format, PR_ROUNDS);
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket(dataFormat + 3, tempData + 12);
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
P384(s, constant7Format, PR_ROUNDS);
}
s[9] ^= 0x80000000;
}
void ProcessPlaintext(u32 *s, const u8* m, unsigned long long mlen, unsigned char *c) {
u32 dataFormat[6] = { 0 };
u8 tempData[24] = { 0 };
if (mlen) {
while (mlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket(dataFormat + 3, m + 12);
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
unpackU96FormatToThreePacket(c, s);
unpackU96FormatToThreePacket(c + 12, s + 3);
P384(s, constant7Format, PR_ROUNDS);
mlen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen * sizeof(unsigned char));
tempData[mlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket(dataFormat + 3, tempData + 12);
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
//*c = EXT_BYTE(x0, i);
unpackU96FormatToThreePacket(tempData, s);
unpackU96FormatToThreePacket(tempData + 12, s + 3);
memcpy(c, tempData, mlen * sizeof(unsigned char));
//c += mlen;
}
}
void Finalize_GenerateTag(u32 *s, unsigned char *c) {
u8 tempData[12] = { 0 };
P384(s, constant7Format, PRF_ROUNDS);
// return tag
unpackU96FormatToThreePacket(c , s);
unpackU96FormatToThreePacket(tempData, s + 3);
memcpy(c + 12 , tempData, sizeof(unsigned char) * 4);
}
void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigned long long clen)
{
u32 dataFormat[12] = { 0 };
u32 dataFormat_1[12] = { 0 };
u8 tempU8[24] = { 0 },i;
if (clen) {
while (clen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
dataFormat_1[2] = s[2] ^ dataFormat[2];
packU96FormatToThreePacket(dataFormat + 3, c + 12);
dataFormat_1[3] = s[3] ^ dataFormat[3];
dataFormat_1[4] = s[4] ^ dataFormat[4];
dataFormat_1[5] = s[5] ^ dataFormat[5];
unpackU96FormatToThreePacket(m, dataFormat_1);
unpackU96FormatToThreePacket(m + 12, dataFormat_1 + 3);
s[0] = dataFormat[0];
s[1] = dataFormat[1];
s[2] = dataFormat[2];
s[3] = dataFormat[3];
s[4] = dataFormat[4];
s[5] = dataFormat[5];
P384(s, constant7Format, PR_ROUNDS);
clen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
unpackU96FormatToThreePacket(tempU8, s);
unpackU96FormatToThreePacket(tempU8 + 12, s + 3);
for (i = 0; i < clen; ++i, ++m, ++c)
{
*m = tempU8[i] ^ *c;
tempU8[i] = *c;
}
tempU8[i] ^= 0x01;
packU96FormatToThreePacket(s, tempU8);
packU96FormatToThreePacket(s + 3, tempU8 + 12);
}
}
int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m, unsigned long long *mlen) {
u8 tempU8[24] = { 0 };
P384(s, constant7Format, PRF_ROUNDS);
// return tag
unpackU96FormatToThreePacket(tempU8, s);
unpackU96FormatToThreePacket(tempU8 + 12, s + 3);
if (memcmp((void*)tempU8, (void*)(c), CRYPTO_ABYTES)) {
memset(m, 0, sizeof(unsigned char) * (*mlen));
*mlen = 0;
return -1;
}
return 0;
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
u32 s[12] = { 0 };
*clen = mlen + CRYPTO_ABYTES;
// initialization
Initialize(s,npub,k);
// process associated data
ProcessAssocData(s, ad, adlen);
ProcessPlaintext(s, m, mlen,c);
// finalization
Finalize_GenerateTag(s, c + mlen);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u32 s[12] = { 0 };
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
// initialization
Initialize(s, npub, k);
// process associated data
ProcessAssocData(s, ad, adlen);
ProcessCiphertext(s,m, c, clen - CRYPTO_ABYTES);
// finalization
return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen);
}
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
.macro LFSR6_MACRO
bst rc, 5
bld tmp0, 0
bst rc, 4
bld tmp1, 0
eor tmp0, tmp1
ror tmp0
rol rc
andi rc, 0x3F
.endm
.macro LFSR7_MACRO
bst rc, 6
bld tmp0, 0
bst rc, 5
bld tmp1, 0
eor tmp0, tmp1
ror tmp0
rol rc
andi rc, 0x7F
.endm
.macro LFSR8_MACRO
bst rc, 7
bld tmp0, 0
bst rc, 5
bld tmp1, 0
eor tmp0, tmp1
bst rc, 4
bld tmp1, 0
eor tmp0, tmp1
bst rc, 3
bld tmp1, 0
eor tmp0, tmp1
ror tmp0
rol rc
.endm
.macro Sbox i0, i1, i2, i3
mov tmp0, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, tmp0
eor \i0, \i3
eor \i2, \i0
eor tmp0, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, tmp0
and tmp0, \i2
eor \i1, tmp0
.endm
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
#define CRYPTO_AEAD
//#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 384
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 128
/* For CRYPTO_HASH */
#define CRYPTO_BITS 256
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#ifdef __cplusplus
}
#endif
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
#include "assist.h"
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi rc, 0x01
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
round_loop_start:
rjmp AddRC_SubColumns_Start
load_columns_table:
rjmp load_column0
rjmp load_column1
rjmp load_column2
rjmp load_column3
rjmp load_column4
rjmp load_column5
rjmp load_column6
rjmp load_column7
rjmp amend_shiftRow
load_column0:
mov x3j, x30
rjmp Sbox_one_column
load_column1:
mov x30, x3j
mov x3j, x31
rjmp Sbox_one_column
load_column2:
mov x31, x3j
mov x3j, x32
rjmp Sbox_one_column
load_column3:
mov x32, x3j
mov x3j, x33
rjmp Sbox_one_column
load_column4:
mov x33, x3j
mov x3j, x34
rjmp Sbox_one_column
load_column5:
mov x34, x3j
mov x3j, x35
rjmp Sbox_one_column
load_column6:
mov x35, x3j
mov x3j, x36
rjmp Sbox_one_column
load_column7:
mov x36, x3j
mov x3j, x37
rjmp Sbox_one_column
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
LFSR_table:
rjmp LFSR6
rjmp LFSR7
LFSR6:
LFSR6_MACRO
rjmp LFSR_DONE
LFSR7:
LFSR7_MACRO
rjmp LFSR_DONE
#endif
;;;;;;;;;;;;;;;;;;;;;;;; Real Start
AddRC_SubColumns_Start:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
clr ccnt
ld x0j, Y
eor x0j, rc
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
ldi ZL, pm_lo8(LFSR_table)
ldi ZH, pm_hi8(LFSR_table)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH
adiw ZL, 1
ijmp
LFSR_DONE:
#elif defined(CRYPTO_AEAD)
LFSR6_MACRO ; only AEAD
#else
LFSR7_MACRO ; only HASH
#endif
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
ldi ZL, pm_lo8(load_columns_table)
ldi ZH, pm_hi8(load_columns_table)
ijmp
Sbox_one_column:
Sbox x0j, x1j, x2j, x3j
; 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- 3
; 4 3 2 1 0 7 6 5
; Store a byte to Row 0
st Y, x0j
; Store a byte combined with ShiftRow1
lsl t1j
mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left)
rol x1j
std Y + ROW_INBYTES, x1j
; Store a byte combined with ShiftRow2
inc ccnt
cpi ccnt, ROW_INBYTES
breq ROW2_WRAP
ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t2j
jmp NO_ROW2_WRAP
ROW2_WRAP:
std Y + ROW_INBYTES + 1, x2j
; remain ShiftRow3 to be done at 'amend_shiftRow'
NO_ROW2_WRAP:
adiw YL, 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
adiw ZL, 1
ijmp
amend_shiftRow:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x1j, Y
bst t1j, 7
bld x1j, 0
st Y, x1j
; <<< 1
mov x37, x3j
rol x3j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
mov x3j, x30
mov x30, x35
mov x35, x32
mov x32, x37
mov x37, x34
mov x34, x31
mov x31, x36
mov x36, x33
mov x33, x3j
dec rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
#include "assist.h"
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi rc, 0x01
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
round_loop_start:
rjmp AddRC_SubColumns_Start
load_columns_table:
rjmp load_column0
rjmp load_column1
rjmp load_column2
rjmp load_column3
rjmp load_column4
rjmp load_column5
rjmp load_column6
rjmp load_column7
rjmp load_column8
rjmp load_column9
rjmp load_columna
rjmp load_columnb
rjmp amend_shiftRow
load_column0:
mov x3j, x30
rjmp Sbox_one_column
load_column1:
mov x30, x3j
mov x3j, x31
rjmp Sbox_one_column
load_column2:
mov x31, x3j
mov x3j, x32
rjmp Sbox_one_column
load_column3:
mov x32, x3j
mov x3j, x33
rjmp Sbox_one_column
load_column4:
mov x33, x3j
mov x3j, x34
rjmp Sbox_one_column
load_column5:
mov x34, x3j
mov x3j, x35
rjmp Sbox_one_column
load_column6:
mov x35, x3j
mov x3j, x36
rjmp Sbox_one_column
load_column7:
mov x36, x3j
mov x3j, x37
rjmp Sbox_one_column
load_column8:
mov x37, x3j
mov x3j, x38
rjmp Sbox_one_column
load_column9:
mov x38, x3j
mov x3j, x39
rjmp Sbox_one_column
load_columna:
mov x39, x3j
mov x3j, x3a
rjmp Sbox_one_column
load_columnb:
mov x3a, x3j
mov x3j, x3b
rjmp Sbox_one_column
;;;;;;;;;;;;;;;;;;;;;;;; Real Start
AddRC_SubColumns_Start:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ldi ZL, pm_lo8(load_columns_table)
ldi ZH, pm_hi8(load_columns_table)
clr ccnt
ld x0j, Y
eor x0j, rc
LFSR7_MACRO
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
ijmp
Sbox_one_column:
Sbox x0j, x1j, x2j, x3j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; Store a byte to Row 0
st Y, x0j
; Store a byte combined with ShiftRow 1
lsl t1j
mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left)
rol x1j
std Y + ROW_INBYTES, x1j
; Store a byte combined with ShiftRow 2
inc ccnt
cpi ccnt, ROW_INBYTES
breq ROW2_WRAP
ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t2j
jmp NO_ROW2_WRAP
ROW2_WRAP:
std Y + ROW_INBYTES + 1, x2j
; remain ShiftRow3 to be done at 'amend_shiftRow'
NO_ROW2_WRAP:
adiw YL, 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
adiw ZL, 1
ijmp
amend_shiftRow:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x1j, Y
bst t1j, 7
bld x1j, 0
st Y, x1j
; >>> 1
mov x3b, x3j
ror x3j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
;mov x3j, x30
;mov x30, x35
;mov x35, x32
;mov x32, x37
;mov x37, x34
;mov x34, x31
;mov x31, x36
;mov x36, x33
;mov x33, x3j
mov x3j, x30
mov x30, x35
mov x35, x3a
mov x3a, x33
mov x33, x38
mov x38, x31
mov x31, x36
mov x36, x3b
mov x3b, x34
mov x34, x39
mov x39, x32
mov x32, x37
mov x37, x3j
dec rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
#include "assist.h"
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi rc, 0x01
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ld x3c, Y+
ld x3d, Y+
ld x3e, Y+
ld x3f, Y+
round_loop_start:
rjmp AddRC_SubColumns_Start
load_columns_table:
rjmp load_column0
rjmp load_column1
rjmp load_column2
rjmp load_column3
rjmp load_column4
rjmp load_column5
rjmp load_column6
rjmp load_column7
rjmp load_column8
rjmp load_column9
rjmp load_columna
rjmp load_columnb
rjmp load_columnc
rjmp load_columnd
rjmp load_columne
rjmp load_columnf
rjmp amend_shiftRow
load_column0:
mov x3j, x30
rjmp Sbox_one_column
load_column1:
mov x30, x3j
mov x3j, x31
rjmp Sbox_one_column
load_column2:
mov x31, x3j
mov x3j, x32
rjmp Sbox_one_column
load_column3:
mov x32, x3j
mov x3j, x33
rjmp Sbox_one_column
load_column4:
mov x33, x3j
mov x3j, x34
rjmp Sbox_one_column
load_column5:
mov x34, x3j
mov x3j, x35
rjmp Sbox_one_column
load_column6:
mov x35, x3j
mov x3j, x36
rjmp Sbox_one_column
load_column7:
mov x36, x3j
mov x3j, x37
rjmp Sbox_one_column
load_column8:
mov x37, x3j
mov x3j, x38
rjmp Sbox_one_column
load_column9:
mov x38, x3j
mov x3j, x39
rjmp Sbox_one_column
load_columna:
mov x39, x3j
mov x3j, x3a
rjmp Sbox_one_column
load_columnb:
mov x3a, x3j
mov x3j, x3b
rjmp Sbox_one_column
load_columnc:
mov x3b, x3j
mov x3j, x3c
rjmp Sbox_one_column
load_columnd:
mov x3c, x3j
mov x3j, x3d
rjmp Sbox_one_column
load_columne:
mov x3d, x3j
mov x3j, x3e
rjmp Sbox_one_column
load_columnf:
mov x3e, x3j
mov x3j, x3f
rjmp Sbox_one_column
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
LFSR_table:
rjmp LFSR7
rjmp LFSR8
LFSR7:
LFSR7_MACRO
rjmp LFSR_DONE
LFSR8:
LFSR8_MACRO
rjmp LFSR_DONE
#endif
;;;;;;;;;;;;;;;;;;;;;;;; Real Start
AddRC_SubColumns_Start:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
clr ccnt
ld x0j, Y
eor x0j, rc
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
ldi ZL, pm_lo8(LFSR_table)
ldi ZH, pm_hi8(LFSR_table)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH
adiw ZL, 1
ijmp
LFSR_DONE:
#elif defined(CRYPTO_AEAD)
LFSR7_MACRO ; only AEAD
#else
LFSR8_MACRO ; only HASH
#endif
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
ldd t2j, Y + 2 * ROW_INBYTES + 1
ldi ZL, pm_lo8(load_columns_table)
ldi ZH, pm_hi8(load_columns_table)
ijmp
Sbox_one_column:
Sbox x0j, x1j, x2j, x3j
; f e d c b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; Store a byte to Row 0
st Y, x0j
; Store a byte combined with ShiftRow1
lsl t1j
mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left)
rol x1j
std Y + ROW_INBYTES, x1j
; Store a byte combined with ShiftRow2
inc ccnt
cpi ccnt, ROW_INBYTES - 1
brsh ROW2_WRAP
ldd tmp0, Y + 2 * ROW_INBYTES + 2 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes
std Y + 2 * ROW_INBYTES + 2, x2j
mov x2j, t2j
mov t2j, tmp0
jmp NO_ROW2_WRAP
ROW2_WRAP:
std Y + ROW_INBYTES + 2, x2j
mov x2j, t2j
; remain ShiftRow3 to be done at 'amend_shiftRow'
NO_ROW2_WRAP:
adiw YL, 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
adiw ZL, 1
ijmp
amend_shiftRow:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x1j, Y
bst t1j, 7
bld x1j, 0
st Y, x1j
; <<< 1
mov x3f, x3j
rol x3j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
rol x38
rol x39
rol x3a
rol x3b
rol x3c
rol x3d
rol x3e
rol x3f
; <<< 24
; f e d c b a 9 8 7 6 5 4 3 2 1 0 =>
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
mov x3j, x30
mov x30, x3d
mov x3d, x3a
mov x3a, x37
mov x37, x34
mov x34, x31
mov x31, x3e
mov x3e, x3b
mov x3b, x38
mov x38, x35
mov x35, x32
mov x32, x3f
mov x3f, x3c
mov x3c, x39
mov x39, x36
mov x36, x33
mov x33, x3j
dec rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
st Y+, x3c
st Y+, x3d
st Y+, x3e
st Y+, x3f
POP_CONFLICT
ret
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment