From 8e94fb3ceecc5d0297b9a1d0995d30609c250101 Mon Sep 17 00:00:00 2001 From: Zhao Xuefeng Date: Sun, 23 Aug 2020 13:49:17 +0000 Subject: [PATCH] knot --- knot/Implementations/crypto_aead/knot128v1/armcortexm_2/api.h | 1 - knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.h | 73 ++++++++++++------------------------------------------------------------- knot/Implementations/crypto_aead/knot128v1/armcortexm_2/crypto_aead.h | 26 +++++++++----------------- knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c | 318 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ knot/Implementations/crypto_aead/knot128v1/armcortexm_4/api.h | 6 ++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_4/auxFormat.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_4/auxFormat.h | 30 ++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_4/crypto_aead.h | 10 ++++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_4/encrypt.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h | 217 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------- knot/Implementations/crypto_aead/knot128v2/armcortexm_2/crypto_aead.h | 1 + knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c | 326 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- knot/Implementations/crypto_aead/knot128v2/armcortexm_4/api.h | 7 +++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_4/crypto_aead.h | 18 ++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_4/encrypt.c | 181 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h | 315 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- knot/Implementations/crypto_aead/knot192/armcortexm_4/api.h | 6 ++++++ knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_4/crypto_aead.h | 18 ++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_4/encrypt.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------ knot/Implementations/crypto_aead/knot256/armcortexm_2/crypto_aead.h | 25 +++++++++---------------- knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c | 465 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- knot/Implementations/crypto_aead/knot256/armcortexm_4/api.h | 6 ++++++ knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_4/crypto_aead.h | 10 ++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_4/encrypt.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 36 files changed, 2365 insertions(+), 1597 deletions(-) create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.c create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_4/api.h create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_4/auxFormat.c create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_4/auxFormat.h create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_4/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_4/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_4/api.h create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_4/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_4/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_4/api.h create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_4/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_4/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.c create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_4/api.h create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.c create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.h create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_4/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_4/encrypt.c diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/api.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/api.h index 2c52a6d..934904a 100644 --- a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/api.h +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/api.h @@ -1,4 +1,3 @@ -//k=n=tag=128 b=256 r=64 c=192 #define CRYPTO_KEYBYTES 16 // #define CRYPTO_NSECBYTES 0 #define CRYPTO_NPUBBYTES 16 diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.c new file mode 100644 index 0000000..33caf71 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.c @@ -0,0 +1,112 @@ +#include"auxFormat.h" + +//#define PRINTFormatToU8 +#define PRINTU8 +unsigned char constant6Format[52] = { +/*constant6_aead_128v1:*/ +0x01, 0x10, 0x02, 0x20, 0x04, 0x41, 0x11, 0x12, 0x22, 0x24, 0x45, 0x50, 0x03, + 0x30, 0x06, 0x61, 0x15, 0x53, 0x33, 0x36, 0x67, 0x74, 0x46, 0x60, 0x05, + 0x51, 0x13, 0x32, 0x26, 0x65, 0x54, 0x42, 0x21, 0x14, 0x43, 0x31, 0x16, + 0x63, 0x35, 0x57, 0x72, 0x27, 0x75, 0x56, 0x62, 0x25, 0x55, 0x52, 0x23, + 0x34, 0x47, 0x70, }; +void P256(unsigned int *s, unsigned char *rc, unsigned char rounds) { + unsigned int reg1, reg2; + asm volatile ( + "enc_loop: \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "/*sbox first column*/ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[S_4], %[reg1] \n\t" + "orrs %[S_4], %[S_2], %[S_4] \n\t" + "eors %[S_0], %[S_6], %[S_0] \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] \n\t" + "eors %[S_6], %[S_6], %[reg1] \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column*/ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_3], %[S_1] \n\t" + "eors %[reg1], %[S_5], %[reg1] \n\t" + "orrs %[S_5], %[S_3], %[S_5] \n\t" + "eors %[S_1], %[S_7], %[S_1] \n\t" + "eors %[S_5], %[S_5], %[S_1] \n\t" + "eors %[reg2], %[S_3], %[S_7] \n\t" + "eors %[S_7], %[S_7], %[reg1] \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[reg2],%[S_1] \n\t" + "ands %[S_3], %[S_5], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*rotate shift left 1 bit*/ \n\t" + "mov %[reg1], %[S_3] \n\t" + "mov %[S_3], %[S_2] , ROR #31 \n\t" + "mov %[S_2], %[reg1] \n\t" + "/*rotate shift left 8 bits*/ \n\t" + "mov %[S_4], %[S_4] , ROR #28 \n\t" + "mov %[S_5], %[S_5] , ROR #28 \n\t" + "/*rotate shift left 25 bits*/ \n\t" + "mov %[reg1], %[S_6] \n\t" + "mov %[S_6], %[S_7] , ROR #20 \n\t" + "mov %[S_7], %[reg1] , ROR #19 \n\t" + "/*loop control*/ \n\t" + "adds %[rc], %[rc], #1 \n\t" + "subs %[ro], %[ro], #1 \n\t" + "bne enc_loop \n\t" + /* ----------------------------- */ + : /* output variables - including inputs that are changed */ + [ro] "+r" (rounds),[reg1] "=r" (reg1), [reg2] "=r" (reg2), [rc] "+r" (rc), + [S_0] "+r" (s[0]), [S_2] "+r" (s[2]), [S_4] "+r" (s[4]), [S_6] "+r" (s[6]) , + [S_1] "+r" (s[1]), [S_3] "+r" (s[3]), [S_5] "+r" (s[5]), [S_7] "+r" (s[7]) + : /* input variables */ + : /* clobber registers for temporary values */ + ); + +} + +void packFormat(u32 * out, const u8 * in) { + u32 t0 = U32BIG(((u32* )in)[0]); + u32 t1 = U32BIG(((u32* )in)[1]); + u32 r0, r1; + r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); + r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); + r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); + r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); //t0 odd even + r1 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= r1 ^ (r1 << 1); + r1 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= r1 ^ (r1 << 2); + r1 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= r1 ^ (r1 << 4); + r1 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= r1 ^ (r1 << 8); //t1 odd even + out[0] = (t1 & 0xFFFF0000) | (t0 >> 16); // t1.odd|t0.odd + out[1] = (t1 << 16) | (t0 & 0x0000FFFF); // t1.even|t0.even +} +void unpackFormat(u8 * out, u32 * in) { + u32 t[2] = { 0 }; + t[1] = (in[0] & 0xFFFF0000) | (in[1] >> 16); + t[0] = (in[1] & 0x0000FFFF) | (in[0] << 16); + u32 r0, r1; + r0 = (t[0] ^ (t[0] >> 8)) & 0x0000FF00, t[0] ^= r0 ^ (r0 << 8); + r0 = (t[0] ^ (t[0] >> 4)) & 0x00F000F0, t[0] ^= r0 ^ (r0 << 4); + r0 = (t[0] ^ (t[0] >> 2)) & 0x0C0C0C0C, t[0] ^= r0 ^ (r0 << 2); + r0 = (t[0] ^ (t[0] >> 1)) & 0x22222222, t[0] ^= r0 ^ (r0 << 1); + r1 = (t[1] ^ (t[1] >> 8)) & 0x0000FF00, t[1] ^= r1 ^ (r1 << 8); + r1 = (t[1] ^ (t[1] >> 4)) & 0x00F000F0, t[1] ^= r1 ^ (r1 << 4); + r1 = (t[1] ^ (t[1] >> 2)) & 0x0C0C0C0C, t[1] ^= r1 ^ (r1 << 2); + r1 = (t[1] ^ (t[1] >> 1)) & 0x22222222, t[1] ^= r1 ^ (r1 << 1); + memcpy(out, t, 8 * sizeof(unsigned char)); +} + + +void getU32Format(u32 *out, const u8* in) { + u32 r0, lo = U32BIG(((u32* )in)[0]); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + *out = lo; +} diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.h index a5c1b7e..6e1068f 100644 --- a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.h @@ -1,75 +1,26 @@ -#include -#include -#include -#include +#include #include"crypto_aead.h" #include"api.h" +#include"stdio.h" #include #define U32BIG(x) (x) - #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) - -#define sbox(a, b, c, d, e, f, g, h) \ -{ \ - t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ -} - typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; +unsigned char constant6Format[52]; +unsigned char constant7Format[68]; + +#define RATE (64 / 8) + +#define PR0_ROUNDS 52 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 -#define packFormat(out,in) {\ -t1 = U32BIG(((u32*)in)[0]); \ -t2 = U32BIG(((u32*)in)[1]); \ -t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \ -t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \ -t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \ -t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \ -t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \ -t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \ -t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \ -t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \ -out[0] = (t2 & 0xFFFF0000) | (t1 >> 16); \ -out[1] = (t2 << 16) | (t1 & 0x0000FFFF); \ -} -#define unpackFormat(out, in) {\ - t2 = (in[0] & 0xFFFF0000) | (in[1] >> 16); \ - t1 = (in[1] & 0x0000FFFF) | (in[0] << 16); \ - t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \ - t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \ - t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \ - t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \ - t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \ - t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \ - t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \ - t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \ - *((u64*)out) = ((u64)t2 << 32 | t1); \ -} -#define getU32Format(out, in) {\ - t1, t2 = U32BIG(((u32*)in)[0]); \ - t1 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t1 ^ (t1 << 1); \ - t1 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t1 ^ (t1 << 2); \ - t1 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t1 ^ (t1 << 4); \ - t1 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t1 ^ (t1 << 8); \ - *out = t2; \ -} -#define ROUND256( constant6Format,lunNum) {\ - s[0] ^= constant6Format[lunNum]>> 4;\ - s[1] ^= constant6Format[lunNum]& 0x0f;\ - sbox(s[0], s[2], s[4], s[6], s_temp[0], s_temp[2], s_temp[4], s_temp[6]);\ - sbox(s[1], s[3], s[5], s[7], s_temp[1], s_temp[3], s_temp[5], s_temp[7]);\ - s[0] = s_temp[0];\ - s[1] = s_temp[1];\ - s[2] = s_temp[3];\ - s[3] = LOTR32(s_temp[2], 1);\ - s[4] = LOTR32(s_temp[4], 4);\ - s[5] = LOTR32(s_temp[5], 4);\ - s[6] = LOTR32(s_temp[7], 12);\ - s[7] = LOTR32(s_temp[6], 13);\ -} -void printfFormat(char name[], u32 * in); +void packFormat(u32 * out, const u8 * in); +void unpackFormat(u8 * out, u32 * in); void printU8(char name[], u8 var[], long len, int offset); diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/crypto_aead.h index 862d176..10ecefb 100644 --- a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/crypto_aead.h +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/crypto_aead.h @@ -1,18 +1,10 @@ +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k); -int crypto_aead_encrypt( - unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k -); - -int crypto_aead_decrypt( - unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k -); +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k); diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c index 4b84924..cee3160 100644 --- a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c @@ -1,271 +1,71 @@ - #include"auxFormat.h" -#define RATE (64 / 8) - -#define PR0_ROUNDS 52 -#define PR_ROUNDS 28 -#define PRF_ROUNDS 32 -unsigned char constant6Format[63] = { - /*constant6_aead_128v1:*/ -0x1, -0x10, -0x2, -0x20, -0x4, -0x41, -0x11, -0x12, -0x22, -0x24, -0x45, -0x50, -0x3, -0x30, -0x6, -0x61, -0x15, -0x53, -0x33, -0x36, -0x67, -0x74, -0x46, -0x60, -0x5, -0x51, -0x13, -0x32, -0x26, -0x65, -0x54, -0x42, -0x21, -0x14, -0x43, -0x31, -0x16, -0x63, -0x35, -0x57, -0x72, -0x27, -0x75, -0x56, -0x62, -0x25, -0x55, -0x52, -0x23, -0x34, -0x47, -0x70, -0x7, -0x71, -0x17, -0x73, -0x37, -0x77, -0x76, -0x66, -0x64, -0x44, -0x40, - -}; - - - - -static void permutation256(unsigned int *in, int rounds, unsigned char *rc) { - uint32_t w0, w1, w2, w3, w4, w5, w6, w7; - uint32_t s0, s1, s2; - uint32_t one = 0x1; - uint32_t i=0; - uint32_t ff = 0xff; - __asm volatile( - "ldr w0, [in] \n\t" - "ldr w4, [in, #4] \n\t" - "ldr w1, [in, #8] \n\t" - "ldr w5, [in, #12] \n\t" - "ldr w2, [in, #16] \n\t" - "ldr w6, [in, #20] \n\t" - "ldr w3, [in, #24] \n\t" - "ldr w7, [in, #28] \n\t" - "enc_loop: \n\t" - "/*add round const s0 s1*/ \n\t" - "ldrb s0, [rc] \n\t" - "LSR s1, s0, #4 \n\t" - "and s0, s0, 0xf \n\t" - "eors w4, w4, s0 \n\t" - "eors w0, w0, s1 \n\t" - "/*sbox first column*/ \n\t" - "mvns w0, w0 \n\t" - "ands s0, w1, w0 \n\t" - "eors s0, w2, s0 \n\t" - "orrs w2, w1, w2 \n\t" - "eors w0, w3, w0 \n\t" - "eors w2, w2, w0 \n\t" - "eors s1, w1, w3 \n\t" - "eors w3, w3, s0 \n\t" - "ands w0, s0, w0 \n\t" - "eors w0, s1, w0 \n\t" - "ands w1, w2, s1 \n\t" - "eors w1, s0, w1 \n\t" - "/*sbox second column*/ \n\t" - "mvns w4, w4 \n\t" - "ands s0, w5, w4 \n\t" - "eors s0, w6, s0 \n\t" - "orrs w6, w5, w6 \n\t" - "eors w4, w7, w4 \n\t" - "eors w6, w6, w4 \n\t" - "eors s1, w5, w7 \n\t" - "eors w7, w7, s0 \n\t" - "ands w4, s0, w4 \n\t" - "eors w4, s1, w4 \n\t" - "ands w5, w6, s1 \n\t" - "eors w5, s0, w5 \n\t" - "/*rotate shift left 1 bit*/ \n\t" - "mov s0, w5 \n\t" - "ROR w5, w1, #31 \n\t" - "mov w1, s0 \n\t" - "/*rotate shift left 8 bits*/ \n\t" - "ROR w2, w2, #28 \n\t" - "ROR w6, w6, #28 \n\t" - "/*rotate shift left 25 bits*/ \n\t" - "mov s0, w3 \n\t" - "ROR w3, w7, #20 \n\t" - "ROR w7, s0, #19 \n\t" - "/*loop control*/ \n\t" - "adds rc, rc, #1 \n\t" - "subs rounds, rounds, #1 \n\t" - "bne enc_loop \n\t" - "str w0, [in] \n\t" - "str w4, [in, #4] \n\t" - "str w1, [in, #8] \n\t" - "str w5, [in, #12] \n\t" - "str w2, [in, #16] \n\t" - "str w6, [in, #20] \n\t" - "str w3, [in, #24] \n\t" - "str w7, [in, #28] \n\t" - ); -} - - -int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const unsigned char *k) { - unsigned int i, j; - u32 s[8] = { 0 }; +void ProcessAssocData(unsigned int *s, const u8* ad, unsigned long long adlen) { u32 dataFormat[2] = { 0 }; u8 tempData[8]; - u32 s_temp[8] = { 0 }; - u32 t1, t2, t3, t5, t6, t8, t9, t11; - *clen = mlen + CRYPTO_ABYTES; - //initialization - packFormat(s, npub); - packFormat((s + 2), (npub + 8)); - packFormat((s + 4), k); - packFormat((s + 6), (k + 8)); - permutation256(s,PR0_ROUNDS,constant6Format); - // process associated data if (adlen) { while (adlen >= RATE) { packFormat(dataFormat, ad); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; - permutation256(s,PR_ROUNDS,constant6Format); + P256(s, constant6Format, PR_ROUNDS); adlen -= RATE; ad += RATE; } memset(tempData, 0, sizeof(tempData)); -memcpy(tempData, ad, adlen * sizeof(unsigned char)); -tempData[adlen] = 0x01; + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; packFormat(dataFormat, tempData); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; - permutation256(s,PR_ROUNDS,constant6Format); + P256(s, constant6Format, PR_ROUNDS); } s[6] ^= 0x80000000; +} +void ProcessPlaintext(unsigned int *s, const u8* m, unsigned long long mlen, + unsigned char *c) { + u32 dataFormat[2] = { 0 }; + u8 tempData[8] = { 0 }; if (mlen) { while (mlen >= RATE) { packFormat(dataFormat, m); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; unpackFormat(c, s); - permutation256(s,PR_ROUNDS,constant6Format); + P256(s, constant6Format, PR_ROUNDS); mlen -= RATE; m += RATE; c += RATE; } memset(tempData, 0, sizeof(tempData)); -memcpy(tempData, m, mlen * sizeof(unsigned char)); - -tempData[mlen]= 0x01; + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; packFormat(dataFormat, tempData); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; unpackFormat(tempData, s); memcpy(c, tempData, mlen * sizeof(unsigned char)); - c +=mlen; + //c+=mlen; } - // finalization - permutation256(s,PRF_ROUNDS,constant6Format); +} +void Finalize_GenerateTag(unsigned int *s, unsigned char *c) { + P256(s, constant6Format, PRF_ROUNDS); // return tag - unpackFormat(tempData, s); - memcpy(c, tempData, sizeof(tempData)); - unpackFormat(tempData,(s + 2)); - memcpy(c+8, tempData, sizeof(tempData)); -// unpackFormat((c), s); -// unpackFormat((c+8),(s + 2)); - return 0; + unpackFormat(c, s); + unpackFormat((c + 8), (s + 2)); } - -int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, const unsigned char *k) { - u8 i, j; - // initialization - u32 s[8] = { 0 }; - u32 dataFormat[4] = { 0 }; - u32 dataFormat_1[2] = { 0 }; - u8 tempU8[32] = { 0 }; - u8 tempData[8]; - u32 s_temp[8] = { 0 }; - u32 t1, t2, t3, t5, t6, t8, t9, t11; - *mlen = clen - CRYPTO_ABYTES; - if (clen < CRYPTO_ABYTES) - return -1; - //initialization +void Initialize(unsigned int *s, const unsigned char *npub, const unsigned char *k) { packFormat(s, npub); - packFormat((s + 2), (npub + 8)); - packFormat((s + 4), k); - packFormat((s + 6), (k + 8)); - permutation256(s,PR0_ROUNDS,constant6Format); - // process associated data - if (adlen) { - while (adlen >= RATE) { - packFormat(dataFormat, ad); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - permutation256(s,PR_ROUNDS,constant6Format); - adlen -= RATE; - ad += RATE; - } - memset(tempData, 0, sizeof(tempData)); - memcpy(tempData, ad, adlen * sizeof(unsigned char)); - tempData[adlen] = 0x01; - packFormat(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - permutation256(s,PR_ROUNDS,constant6Format); - } - s[6] ^= 0x80000000; - // process c - clen = clen - CRYPTO_KEYBYTES; + packFormat(s + 2, npub + 8); + packFormat(s + 4, k); + packFormat(s + 6, k + 8); + P256(s, constant6Format, PR0_ROUNDS); +} +void ProcessCiphertext(unsigned int *s, unsigned char *m, const unsigned char *c, + unsigned long long clen) { + u8 tempU8[32] = { 0 }, i; + u32 dataFormat[2] = { 0 }; + u32 dataFormat_1[2] = { 0 }; if (clen) { while (clen >= RATE) { packFormat(dataFormat, c); @@ -274,27 +74,65 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, unpackFormat(m, dataFormat_1); s[0] = dataFormat[0]; s[1] = dataFormat[1]; - permutation256(s,PR_ROUNDS,constant6Format); + P256(s, constant6Format, PR_ROUNDS); clen -= RATE; m += RATE; c += RATE; } unpackFormat(tempU8, s); - for (i = 0; i < clen; ++i, ++m, ++c) - { - *m = tempU8[i]^ *c; + for (i = 0; i < clen; ++i, ++m, ++c) { + *m = tempU8[i] ^ *c; tempU8[i] = *c; } tempU8[i] ^= 0x01; - packFormat(s, tempU8); + packFormat(s, tempU8); } - // finalization - permutation256(s,PRF_ROUNDS,constant6Format); +} +int Finalize_VerifyTag(unsigned int *s, const unsigned char *c, unsigned char *m, + unsigned long long *mlen) { + u8 tempU8[16] = { 0 }; + P256(s, constant6Format, PRF_ROUNDS); // return tag - packFormat(dataFormat, c); - packFormat((dataFormat + 2), (c +8)); - if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3]) { + unpackFormat(tempU8, s); + unpackFormat((tempU8 + 8), (s + 2)); + if (memcmp((void*) tempU8, (void*) (c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; return -1; } return 0; } + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + unsigned int s[8] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + //initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessPlaintext(s, m, mlen, c); + // finalization + Finalize_GenerateTag(s, c + mlen); + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + unsigned int s[8] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + //initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + // process cipher + ProcessCiphertext(s, m, c, clen - CRYPTO_KEYBYTES); + // finalization + return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen); +} diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/api.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/api.h new file mode 100644 index 0000000..c9df2a8 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 // +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/auxFormat.c new file mode 100644 index 0000000..d2d2e03 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/auxFormat.c @@ -0,0 +1,70 @@ +#include"auxFormat.h" + +//#define PRINTFormatToU8 +#define PRINTU8 +unsigned char constant6Format[52] = { +/*constant6_aead_128v1:*/ +0x01, 0x10, 0x02, 0x20, 0x04, 0x41, 0x11, 0x12, 0x22, 0x24, 0x45, 0x50, 0x03, + 0x30, 0x06, 0x61, 0x15, 0x53, 0x33, 0x36, 0x67, 0x74, 0x46, 0x60, 0x05, + 0x51, 0x13, 0x32, 0x26, 0x65, 0x54, 0x42, 0x21, 0x14, 0x43, 0x31, 0x16, + 0x63, 0x35, 0x57, 0x72, 0x27, 0x75, 0x56, 0x62, 0x25, 0x55, 0x52, 0x23, + 0x34, 0x47, 0x70, }; + +void P256(unsigned int *s, unsigned char *round, unsigned char lunNum) { + + u32 s_temp[8] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + unsigned char i; + for (i = 0; i < lunNum; i++) { + s[0] ^= round[i] >> 4; + s[1] ^= round[i] & 0x0f; + sbox(s[0], s[2], s[4], s[6], s_temp[2], s_temp[4], s_temp[6]); + sbox(s[1], s[3], s[5], s[7], s[2], s_temp[5], s_temp[7]); + s[3] = LOTR32(s_temp[2], 1); + s[4] = LOTR32(s_temp[4], 4); + s[5] = LOTR32(s_temp[5], 4); + s[6] = LOTR32(s_temp[7], 12); + s[7] = LOTR32(s_temp[6], 13); + } +} +void packFormat(u32 * out, const u8 * in) { + u32 t0 = U32BIG(((u32* )in)[0]); + u32 t1 = U32BIG(((u32* )in)[1]); + u32 r0, r1; + r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); + r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); + r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); + r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); //t0 odd even + r1 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= r1 ^ (r1 << 1); + r1 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= r1 ^ (r1 << 2); + r1 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= r1 ^ (r1 << 4); + r1 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= r1 ^ (r1 << 8); //t1 odd even + out[0] = (t1 & 0xFFFF0000) | (t0 >> 16); // t1.odd|t0.odd + out[1] = (t1 << 16) | (t0 & 0x0000FFFF); // t1.even|t0.even +} +void unpackFormat(u8 * out, u32 * in) { + u32 t[2] = { 0 }; + t[1] = (in[0] & 0xFFFF0000) | (in[1] >> 16); + t[0] = (in[1] & 0x0000FFFF) | (in[0] << 16); + u32 r0, r1; + r0 = (t[0] ^ (t[0] >> 8)) & 0x0000FF00, t[0] ^= r0 ^ (r0 << 8); + r0 = (t[0] ^ (t[0] >> 4)) & 0x00F000F0, t[0] ^= r0 ^ (r0 << 4); + r0 = (t[0] ^ (t[0] >> 2)) & 0x0C0C0C0C, t[0] ^= r0 ^ (r0 << 2); + r0 = (t[0] ^ (t[0] >> 1)) & 0x22222222, t[0] ^= r0 ^ (r0 << 1); + r1 = (t[1] ^ (t[1] >> 8)) & 0x0000FF00, t[1] ^= r1 ^ (r1 << 8); + r1 = (t[1] ^ (t[1] >> 4)) & 0x00F000F0, t[1] ^= r1 ^ (r1 << 4); + r1 = (t[1] ^ (t[1] >> 2)) & 0x0C0C0C0C, t[1] ^= r1 ^ (r1 << 2); + r1 = (t[1] ^ (t[1] >> 1)) & 0x22222222, t[1] ^= r1 ^ (r1 << 1); + memcpy(out, t, 8 * sizeof(unsigned char)); +} + + + +void getU32Format(u32 *out, const u8* in) { + u32 r0, lo = U32BIG(((u32* )in)[0]); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + *out = lo; +} diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/auxFormat.h new file mode 100644 index 0000000..153ab01 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/auxFormat.h @@ -0,0 +1,30 @@ +#include +#include"crypto_aead.h" +#include"api.h" +#include"stdio.h" +#include +#define U32BIG(x) (x) + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +unsigned char constant6Format[52]; +unsigned char constant7Format[68]; + +#define RATE (64 / 8) + +#define PR0_ROUNDS 52 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 + +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +void packFormat(u32 * out, const u8 * in); +void unpackFormat(u8 * out, u32 * in); + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/crypto_aead.h new file mode 100644 index 0000000..10ecefb --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/crypto_aead.h @@ -0,0 +1,10 @@ +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k); + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k); diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/encrypt.c new file mode 100644 index 0000000..f937f31 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_4/encrypt.c @@ -0,0 +1,138 @@ +#include"auxFormat.h" + +void ProcessAssocData(u32 *s, const u8* ad, unsigned long long adlen) { + u32 dataFormat[2] = { 0 }; + u8 tempData[8]; + if (adlen) { + while (adlen >= RATE) { + packFormat(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + } + s[6] ^= 0x80000000; +} +void ProcessPlaintext(u32 *s, const u8* m, unsigned long long mlen, + unsigned char *c) { + u32 dataFormat[2] = { 0 }; + u8 tempData[8] = { 0 }; + if (mlen) { + while (mlen >= RATE) { + packFormat(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(c, s); + P256(s, constant6Format, PR_ROUNDS); + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + //c+=mlen; + } +} +void Finalize_GenerateTag(u32 *s, unsigned char *c) { + P256(s, constant6Format, PRF_ROUNDS); + // return tag + unpackFormat(c, s); + unpackFormat((c + 8), (s + 2)); +} +void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { + packFormat(s, npub); + packFormat(s + 2, npub + 8); + packFormat(s + 4, k); + packFormat(s + 6, k + 8); + P256(s, constant6Format, PR0_ROUNDS); +} +void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, + unsigned long long clen) { + u8 tempU8[32] = { 0 }, i; + u32 dataFormat[2] = { 0 }; + u32 dataFormat_1[2] = { 0 }; + if (clen) { + while (clen >= RATE) { + packFormat(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + unpackFormat(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + clen -= RATE; + m += RATE; + c += RATE; + } + unpackFormat(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packFormat(s, tempU8); + } +} +int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m, + unsigned long long *mlen) { + u8 tempU8[16] = { 0 }; + P256(s, constant6Format, PRF_ROUNDS); + // return tag + unpackFormat(tempU8, s); + unpackFormat((tempU8 + 8), (s + 2)); + if (memcmp((void*) tempU8, (void*) (c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[8] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + //initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessPlaintext(s, m, mlen, c); + // finalization + Finalize_GenerateTag(s, c + mlen); + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[8] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + //initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + // process cipher + ProcessCiphertext(s, m, c, clen - CRYPTO_KEYBYTES); + // finalization + return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen); +} diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c new file mode 100644 index 0000000..6d00d13 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c @@ -0,0 +1,114 @@ +#include"auxFormat.h" + + +//puck begin// +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1_32, t2_64, t2_65; + u32 t[3] = { 0 }; + temp0[0] = in[0] & 0xffe00000; + temp1[0] = (in[0] & 0x001ffc00) << 11; + temp2[0] = (in[0] & 0x000003ff) << 22; + temp0[1] = in[1] & 0xffe00000; + temp1[1] = (in[1] & 0x001ff800) << 11; + t2_64 = ((in[1] & 0x00000400) << 21); + temp2[1] = (in[1] & 0x000003ff) << 22; + temp0[2] = in[2] & 0xffc00000; + t1_32 = ((in[2] & 0x00200000) << 10); + temp1[2] = (in[2] & 0x001ff800) << 11; + t2_65 = ((in[2] & 0x00000400) << 20); + temp2[2] = (in[2] & 0x000003ff) << 22; + unpuckU32ToThree(temp0[0]); + unpuckU32ToThree(temp0[1]); + unpuckU32ToThree(temp0[2]); + t[2] = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; + unpuckU32ToThree(temp1[0]); + unpuckU32ToThree(temp1[1]); + unpuckU32ToThree(temp1[2]); + t[1] = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); + unpuckU32ToThree(temp2[0]); + unpuckU32ToThree(temp2[1]); + unpuckU32ToThree(temp2[2]); + t[0] = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); + memcpy(out, t, 12 * sizeof(unsigned char)); +} +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 t0 = U32BIG(((u32*)in)[2]); + u32 t1 = U32BIG(((u32*)in)[1]); + u32 t2 = U32BIG(((u32*)in)[0]); + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; + t1 = t1 << 1; + t2 = t2 << 2; + temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2; + puckU32ToThree(temp0[0]); + puckU32ToThree(temp0[1]); + puckU32ToThree(temp0[2]); + temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; + puckU32ToThree(temp1[0]); + puckU32ToThree(temp1[1]); + puckU32ToThree(temp1[2]); + temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; + puckU32ToThree(temp2[0]); + puckU32ToThree(temp2[1]); + puckU32ToThree(temp2[2]); + out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); + out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); + out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); +} + +void packU32FormatToThreePacket(u32 * out, u8 * in) { + u32 t2 = U32BIG(((u32*)in)[0]); + u32 temp2[3] = { 0 }; + u8 t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; + t2 = t2 << 2; + temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; + puckU32ToThree(temp2[0]); + puckU32ToThree(temp2[1]); + puckU32ToThree(temp2[2]); + out[0] = (temp2[0] >> 22); + out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); + out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); +} +void unpackU32FormatToThreePacket(u8 * out, u32 * in) { + u32 temp2[3] = { 0 }; + u32 t2_64, t2_65; + u32 t2; + temp2[0] = (in[0] & 0x000003ff) << 22; + + t2_64 = ((in[1] & 0x00000400) << 21); + temp2[1] = (in[1] & 0x000003ff) << 22; + + t2_65 = ((in[2] & 0x00000400) << 20); + temp2[2] = (in[2] & 0x000003ff) << 22; + + unpuckU32ToThree(temp2[0]); + unpuckU32ToThree(temp2[1]); + unpuckU32ToThree(temp2[2]); + t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); + *(u32*)(out) = U32BIG(t2); +} +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { + u32 rci,t1,t2; + unsigned char i; + for (i = 0; i < lunNum; i++) { + rci=constant7Format[i];\ + P384_ARC_SC1(rci,s[3],s[6],s[9]); \ + P384_2SC(s[1],s[4],s[7],s[10],s[2],s[5],s[8],s[11]);\ + P384_SR();\ + } +} +//12*7=84 +unsigned char constant7Format[80] = { + /*constant7Format[127]:*/ + 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, + 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, + 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, + 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, + 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, + 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, + 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,}; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h index df30da6..301566c 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h @@ -1,20 +1,24 @@ -//#include + #include"crypto_aead.h" #include"api.h" - +#include #include -#include #include -#include #define U32BIG(x) (x) typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; +#define aead_RATE (192 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 + #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + //////////////////puck begin //&:5 <<:4 |:4 #define puckU32ToThree(x){\ @@ -31,119 +35,94 @@ x = (x | (x >> 8)) & 0xf00f00f0;\ x = (x | (x >> 4)) & 0xc30c30c3;\ x = (x | (x >> 2)) & 0x92492492;\ } -//使用 u8 t2_64, t2_65;u32 temp2[3];t2; -#define packU32FormatToThreePacket( out, in) {\ -t2 = U32BIG(((u32*)in)[0]); \ -t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ -t2 = t2 << 2; \ -temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ -puckU32ToThree(temp2[0]); \ -puckU32ToThree(temp2[1]); \ -puckU32ToThree(temp2[2]); \ -out[0] = (temp2[0] >> 22); \ -out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); \ -out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); \ -} -//t9 t1 t2 t1_32 t2_64 t2_65 temp0[3] temp1[3] temp2[3] -#define packU96FormatToThreePacket(out, in) {\ -t9 = U32BIG(((u32*)in)[2]); \ -t1 = U32BIG(((u32*)in)[1]); \ -t2 = U32BIG(((u32*)in)[0]); \ -t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ -t1 = t1 << 1; \ -t2 = t2 << 2; \ -temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \ -puckU32ToThree(temp0[0]); \ -puckU32ToThree(temp0[1]); \ -puckU32ToThree(temp0[2]); \ -temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ -puckU32ToThree(temp1[0]); \ -puckU32ToThree(temp1[1]); \ -puckU32ToThree(temp1[2]); \ -temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ -puckU32ToThree(temp2[0]); \ -puckU32ToThree(temp2[1]); \ -puckU32ToThree(temp2[2]); \ -out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \ -out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ -out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ -} - //使用 u8 t2_64, t2_65;u32 temp2[3];t2; -#define unpackU32FormatToThreePacket(out, in) {\ -temp2[0] = (in[0] & 0x000003ff) << 22; \ -t2_64 = ((in[1] & 0x00000400) << 21); \ -temp2[1] = (in[1] & 0x000003ff) << 22; \ -t2_65 = ((in[2] & 0x00000400) << 20); \ -temp2[2] = (in[2] & 0x000003ff) << 22; \ -unpuckU32ToThree(temp2[0]); \ -unpuckU32ToThree(temp2[1]); \ -unpuckU32ToThree(temp2[2]); \ -t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ -*(u32*)(out) = U32BIG(t2); \ -} -//u32 temp0[3] = { 0 };u32 temp1[3] = { 0 };u32 temp2[3] = { 0 };u32 t1_32, t2_64, t2_65;t9,t1,t2, -#define unpackU96FormatToThreePacket( out, in) {\ -temp0[0] = in[0] & 0xffe00000; \ -temp1[0] = (in[0] & 0x001ffc00) << 11; \ -temp2[0] = (in[0] & 0x000003ff) << 22; \ -temp0[1] = in[1] & 0xffe00000; \ -temp1[1] = (in[1] & 0x001ff800) << 11; \ -t2_64 = ((in[1] & 0x00000400) << 21); \ -temp2[1] = (in[1] & 0x000003ff) << 22; \ -temp0[2] = in[2] & 0xffc00000; \ -t1_32 = ((in[2] & 0x00200000) << 10); \ -temp1[2] = (in[2] & 0x001ff800) << 11; \ -t2_65 = ((in[2] & 0x00000400) << 20); \ -temp2[2] = (in[2] & 0x000003ff) << 22; \ -unpuckU32ToThree(temp0[0]); \ -unpuckU32ToThree(temp0[1]); \ -unpuckU32ToThree(temp0[2]); \ -t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \ -unpuckU32ToThree(temp1[0]); \ -unpuckU32ToThree(temp1[1]); \ -unpuckU32ToThree(temp1[2]); \ -t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \ -unpuckU32ToThree(temp2[0]); \ -unpuckU32ToThree(temp2[1]); \ -unpuckU32ToThree(temp2[2]); \ -t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ -*(u32*)(out) = U32BIG(t2); \ -*(u32*)(out + 4) = U32BIG(t1); \ -*(u32*)(out + 8) = U32BIG(t9); \ -} - -#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) -#define sbox(a, b, c, d, e, f, g, h) \ -{ \ - t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ -} - - -#define U96_BIT_LOTR32_1(t0,t1,t2,t3,t4,t5){\ -t3= t1;\ -t4 = t2;\ -t5 = LOTR32(t0, 1); \ -} -#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ -t3= LOTR32(t2, 2);\ -t4 =LOTR32(t0, 3);\ -t5 = LOTR32(t1, 3); \ -} -//55=3*18+1 -#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ -t3= LOTR32(t1, 18); \ -t4 = LOTR32(t2, 18);\ -t5 = LOTR32(t0, 19); \ -} -/* -s0 s1 s2 -s3 s4 s5 -s6 s7 s8 -s9 s10 s11 -*/ +unsigned char constant7Format[80]; -void printU32State(char name[], u32* var, long len); -void printfU96Format(char name[], u32 * s); -//////////////////puck end -void printU8(char name[], u8 var[], int len, int offset); -void printfU96Format(char name[], u32 * s); +#define P384_ARC_SC1(rci,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*add round const s0 s1 s2 */ \n\t"\ + "ands %[t1], %[rci], #0xc0\n\t" \ + "eors %[S_0], %[S_0], %[t1], LSR #6 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x38\n\t" \ + "eors %[S_1], %[S_1], %[t1], LSR #3 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x7\n\t" \ + "eors %[S_3], %[S_3], %[t1] \n\t" /*s[2] ^= constant7Format[lunNum] & 0x7;*/\ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1],%[S_0] \n\t"\ + "eors %[S_0], %[t2],%[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1], %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2), [rci] "+r" (rci), \ + [S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_3] "+r" (s[2]),\ + [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define P384_2SC(S1,S2,S3,S4,S5,S6,S7,S8) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1],%[S_0] \n\t"\ + "eors %[S_0], %[t2],%[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1], %[S_2] \n\t"\ + "/*sbox column*/ \n\t"\ + "mvns %[S_1], %[S_1] \n\t"\ + "ands %[t1], %[S_3], %[S_1] \n\t"\ + "eors %[t1], %[S_5], %[t1] \n\t"\ + "orrs %[S_5], %[S_3], %[S_5] \n\t"\ + "eors %[S_1], %[S_7], %[S_1] \n\t"\ + "eors %[S_5], %[S_5], %[S_1] \n\t"\ + "eors %[t2], %[S_3], %[S_7] \n\t"\ + "eors %[S_7], %[S_7], %[t1] \n\t"\ + "ands %[S_1], %[t1],%[S_1] \n\t"\ + "eors %[S_1], %[t2],%[S_1] \n\t"\ + "ands %[S_3], %[S_5], %[t2] \n\t"\ + "eors %[S_3], %[t1], %[S_3] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) ,\ + [S_1] "+r" (S5), [S_3] "+r" (S6), [S_5] "+r" (S7), [S_7] "+r" (S8)\ + : : );\ +}while (0) +#define P384_SR() \ + do { \ + __asm__ __volatile__ ( \ + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ + "mov %[t1], %[S_3] \n\t"\ + "mov %[S_3], %[S_4] \n\t"\ + "mov %[S_4], %[S_5] \n\t"\ + "ROR %[S_5], %[t1] , #31 \n\t"\ + "/*rotate shift left 8 bits [w10 w6 w2-> 锛坵6,3) (w2,3) ( w10,2)]*/ \n\t"\ + "mov %[t1], %[S_8] \n\t"\ + "ROR %[S_8], %[S_7] , #29 \n\t"\ + "ROR %[S_7], %[S_6] , #29 \n\t"\ + "ROR %[S_6], %[t1] , #30 \n\t"\ + "/*rotate shift left 55 bit [w11 w7 w3-> 锛坵3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t1], %[S_9] \n\t"\ + "ROR %[S_9], %[S_10] , #14 \n\t"\ + "ROR %[S_10], %[S_11] , #14 \n\t"\ + "ROR %[S_11], %[t1] , #13 \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1),\ + [S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\ + [S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\ + [S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\ + : : );\ +}while (0) diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/crypto_aead.h index cdfdf19..862d176 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/crypto_aead.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/crypto_aead.h @@ -1,3 +1,4 @@ + int crypto_aead_encrypt( unsigned char *c, unsigned long long *clen, const unsigned char *m, unsigned long long mlen, diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c index bea9f64..6c2bb34 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c @@ -1,171 +1,30 @@ #include"auxFormat.h" -#define aead_RATE (192 / 8) -#define PR0_ROUNDS 76 -#define PR_ROUNDS 28 -#define PRF_ROUNDS 32 - -unsigned char constant7Format[127] = { - /*constant7Format[127]:*/ - 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, - 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, - 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, - 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, - 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, - 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, - 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, - 0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, - 0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46, - 0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb, - 0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, }; -/* State - * w8 w4 w0 - * w9 w5 w1 - * w10 w6 w2 - * w11 w7 w3 - */ - static void permutation384(unsigned int *in, int rounds, unsigned char *rc) { - - uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11; - uint32_t s0, s1, s2; - uint32_t i=0; - __asm volatile( - "ldr w0, [in] \n\t" - "ldr w4, [in, #4] \n\t" - "ldr w8, [in, #8] \n\t" - "ldr w1, [in, #12] \n\t" - "ldr w5, [in, #16] \n\t" - "ldr w9, [in, #20] \n\t" - "ldr w2, [in, #24] \n\t" - "ldr w6, [in, #28] \n\t" - "ldr w10, [in, #32] \n\t" - "ldr w3, [in, #36] \n\t" - "ldr w7, [in, #40] \n\t" - "ldr w11, [in, #44] \n\t" - "enc_loop: \n\t" - "/*add round const s0 s1*/ \n\t" - "ldrb s0, [rc] \n\t" - "LSR s1, s0, #6 \n\t" - "and s1, s1, 0x3 \n\t" - "LSR s2, s0, #3 \n\t" - "and s2, s2, 0x7 \n\t" - "and s0, s0, 0x7 \n\t" - "eors w8, w8, s0 \n\t" - "eors w4, w4, s2 \n\t" - "eors w0, w0, s1 \n\t" - "/*sbox first column*/ \n\t" - "mvns w0, w0 \n\t" - "ands s0, w1, w0 \n\t" - "eors s0, w2, s0 \n\t" - "orrs w2, w1, w2 \n\t" - "eors w0, w3, w0 \n\t" - "eors w2, w2, w0 \n\t" - "eors s1, w1, w3 \n\t" - "eors w3, w3, s0 \n\t" - "ands w0, s0, w0 \n\t" - "eors w0, s1, w0 \n\t" - "ands w1, w2, s1 \n\t" - "eors w1, s0, w1 \n\t" - "/*sbox second column*/ \n\t" - "mvns w4, w4 \n\t" - "ands s0, w5, w4 \n\t" - "eors s0, w6, s0 \n\t" - "orrs w6, w5, w6 \n\t" - "eors w4, w7, w4 \n\t" - "eors w6, w6, w4 \n\t" - "eors s1, w5, w7 \n\t" - "eors w7, w7, s0 \n\t" - "ands w4, s0, w4 \n\t" - "eors w4, s1, w4 \n\t" - "ands w5, w6, s1 \n\t" - "eors w5, s0, w5 \n\t" - "/*sbox third column*/ \n\t" - "mvns w8, w8 \n\t" - "ands s0, w9, w8 \n\t" - "eors s0, w10, s0 \n\t" - "orrs w10, w9, w10 \n\t" - "eors w8, w11, w8 \n\t" - "eors w10, w10, w8 \n\t" - "eors s1, w9, w11 \n\t" - "eors w11, w11, s0 \n\t" - "ands w8, s0, w8 \n\t" - "eors w8, s1, w8 \n\t" - "ands w9, w10, s1 \n\t" - "eors w9, s0, w9 \n\t" - "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t" - "mov s0, w1 \n\t" - "mov w1, w5 \n\t" - "mov w5, w9 \n\t" - "ROR w9, s0, #31 \n\t" - "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t" - "mov s0, w10 \n\t" - "ROR w10, w6 , #29 \n\t" - "ROR w6, w2 , #29 \n\t" - "ROR w2, s0, #30 \n\t" - "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t" - "mov s0, w3 \n\t" - "ROR w3, w7 , #14 \n\t" - "ROR w7, w11 , #14 \n\t" - "ROR w11, s0, #13 \n\t" - "/*loop control*/ \n\t" - "adds rc, rc, #1 \n\t" - "subs rounds, rounds, #1 \n\t" - "bne enc_loop \n\t" - "str w0, [in] \n\t" - "str w4, [in, #4] \n\t" - "str w8, [in, #8] \n\t" - "str w1, [in, #12] \n\t" - "str w5, [in, #16] \n\t" - "str w9, [in, #20] \n\t" - "str w2, [in, #24] \n\t" - "str w6, [in, #28] \n\t" - "str w10, [in, #32] \n\t" - "str w3, [in, #36] \n\t" - "str w7, [in, #40] \n\t" - "str w11, [in, #44] \n\t" - ); -} - -int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const unsigned char *k) { - u8 i; - u32 s[12] = { 0 }; +void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { u8 tempData[24] = { 0 }; - u32 dataFormat[6] = { 0 }; - u32 s_temp[12] = { 0 }; - u32 t1, t2, t3, t5, t6, t8, t9, t11; - u32 t1_32, t2_64, t2_65; - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - - *clen = mlen + CRYPTO_ABYTES; - // initialization packU96FormatToThreePacket(s, npub); - memcpy(tempData, npub+12, sizeof(unsigned char)*4); - memcpy(tempData+4, k, sizeof(unsigned char) * 16); - packU96FormatToThreePacket((s + 3), tempData); - packU96FormatToThreePacket((s + 6), (tempData+12)); - + memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); + memcpy(tempData + 4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket(s + 3, tempData); + packU96FormatToThreePacket(s + 6, tempData + 12); s[9] = 0x80000000; - permutation384(s,PR0_ROUNDS,constant7Format); - // process associated data + P384(s, constant7Format, PR0_ROUNDS); +} +void ProcessAssocData(u32 *s, const u8* ad, unsigned long long adlen) { + u32 dataFormat[6] = { 0 }; + u8 tempData[24] = { 0 }; if (adlen) { - // rlen = adlen; while (adlen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, ad); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat+3), (ad+12)); + packU96FormatToThreePacket(dataFormat + 3, ad + 12); s[3] ^= dataFormat[3]; s[4] ^= dataFormat[4]; s[5] ^= dataFormat[5]; - permutation384(s,PR_ROUNDS,constant7Format); + P384(s, constant7Format, PR_ROUNDS); adlen -= aead_RATE; ad += aead_RATE; } @@ -176,138 +35,94 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); + packU96FormatToThreePacket(dataFormat + 3, tempData + 12); s[3] ^= dataFormat[3]; s[4] ^= dataFormat[4]; s[5] ^= dataFormat[5]; - permutation384(s,PR_ROUNDS,constant7Format); + + P384(s, constant7Format, PR_ROUNDS); } s[9] ^= 0x80000000; +} + +void ProcessPlaintext(u32 *s, const u8* m, unsigned long long mlen, unsigned char *c) { + u32 dataFormat[6] = { 0 }; + u8 tempData[24] = { 0 }; if (mlen) { while (mlen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, m); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat + 3), (m + 12)); + packU96FormatToThreePacket(dataFormat + 3, m + 12); s[3] ^= dataFormat[3]; s[4] ^= dataFormat[4]; s[5] ^= dataFormat[5]; unpackU96FormatToThreePacket(c, s); - unpackU96FormatToThreePacket((c+12), (s+3)); - permutation384(s,PR_ROUNDS,constant7Format); + unpackU96FormatToThreePacket(c + 12, s + 3); + + P384(s, constant7Format, PR_ROUNDS); mlen -= aead_RATE; m += aead_RATE; c += aead_RATE; } memset(tempData, 0, sizeof(tempData)); memcpy(tempData, m, mlen * sizeof(unsigned char)); - tempData[mlen]= 0x01; + tempData[mlen] = 0x01; packU96FormatToThreePacket(dataFormat, tempData); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); + packU96FormatToThreePacket(dataFormat + 3, tempData + 12); s[3] ^= dataFormat[3]; s[4] ^= dataFormat[4]; s[5] ^= dataFormat[5]; + //*c = EXT_BYTE(x0, i); unpackU96FormatToThreePacket(tempData, s); - unpackU96FormatToThreePacket((tempData+12), (s+3)); - memcpy(c, tempData, mlen * sizeof(unsigned char)); - c += mlen; + unpackU96FormatToThreePacket(tempData + 12, s + 3); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + //c += mlen; } - // finalization - permutation384(s,PRF_ROUNDS,constant7Format); +} +void Finalize_GenerateTag(u32 *s, unsigned char *c) { + u8 tempData[12] = { 0 }; + P384(s, constant7Format, PRF_ROUNDS); // return tag - unpackU96FormatToThreePacket(c, s); - unpackU96FormatToThreePacket(tempData, (s + 3)); - memcpy(c+12, tempData, sizeof(unsigned char) * 4); - return 0; + unpackU96FormatToThreePacket(c , s); + unpackU96FormatToThreePacket(tempData, s + 3); + memcpy(c + 12 , tempData, sizeof(unsigned char) * 4); } - -int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, const unsigned char *k) { - - u8 i, j; - u32 s[12] = { 0 }; - u32 s_temp[12] = { 0 }; +void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigned long long clen) +{ u32 dataFormat[12] = { 0 }; u32 dataFormat_1[12] = { 0 }; - u8 tempData[24] = { 0 }; - u8 tempU8[24] = { 0 }; - u32 t1, t2, t3, t5, t6, t8, t9, t11; - u32 t1_32, t2_64, t2_65; - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; *mlen = clen - CRYPTO_ABYTES; - if (clen < CRYPTO_ABYTES) - return -1; - // initialization - packU96FormatToThreePacket(s, npub); - memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); - memcpy(tempData + 4, k, sizeof(unsigned char) * 16); - packU96FormatToThreePacket((s + 3), tempData); - packU96FormatToThreePacket((s + 6), (tempData + 12)); - - s[9] = 0x80000000; - permutation384(s,PR0_ROUNDS,constant7Format); - // process associated data - if (adlen) { - while (adlen >= aead_RATE) { - packU96FormatToThreePacket(dataFormat, ad); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat + 3), (ad + 12)); - s[3] ^= dataFormat[3]; - s[4] ^= dataFormat[4]; - s[5] ^= dataFormat[5]; - permutation384(s,PR_ROUNDS,constant7Format); - adlen -= aead_RATE; - ad += aead_RATE; - } - memset(tempData, 0, sizeof(tempData)); - memcpy(tempData, ad, adlen * sizeof(unsigned char)); - tempData[adlen] = 0x01; - packU96FormatToThreePacket(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); - s[3] ^= dataFormat[3]; - s[4] ^= dataFormat[4]; - s[5] ^= dataFormat[5]; - permutation384(s,PR_ROUNDS,constant7Format); - } - s[9] ^= 0x80000000; - clen -= CRYPTO_ABYTES; + u8 tempU8[24] = { 0 },i; if (clen) { while (clen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, c); dataFormat_1[0] = s[0] ^ dataFormat[0]; dataFormat_1[1] = s[1] ^ dataFormat[1]; dataFormat_1[2] = s[2] ^ dataFormat[2]; - packU96FormatToThreePacket((dataFormat+3), (c+12)); + packU96FormatToThreePacket(dataFormat + 3, c + 12); dataFormat_1[3] = s[3] ^ dataFormat[3]; dataFormat_1[4] = s[4] ^ dataFormat[4]; dataFormat_1[5] = s[5] ^ dataFormat[5]; unpackU96FormatToThreePacket(m, dataFormat_1); - unpackU96FormatToThreePacket((m + 12), (dataFormat_1 + 3)); + unpackU96FormatToThreePacket(m + 12, dataFormat_1 + 3); s[0] = dataFormat[0]; s[1] = dataFormat[1]; s[2] = dataFormat[2]; s[3] = dataFormat[3]; s[4] = dataFormat[4]; s[5] = dataFormat[5]; - permutation384(s,PR_ROUNDS,constant7Format); + + P384(s, constant7Format, PR_ROUNDS); clen -= aead_RATE; m += aead_RATE; c += aead_RATE; } unpackU96FormatToThreePacket(tempU8, s); - unpackU96FormatToThreePacket((tempU8+12), (s+3)); + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); for (i = 0; i < clen; ++i, ++m, ++c) { *m = tempU8[i] ^ *c; @@ -315,19 +130,52 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, } tempU8[i] ^= 0x01; packU96FormatToThreePacket(s, tempU8); - packU96FormatToThreePacket((s + 3), (tempU8 + 12)); + packU96FormatToThreePacket(s + 3, tempU8 + 12); } - // finalization - permutation384(s,PRF_ROUNDS,constant7Format); +} +int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m, unsigned long long *mlen) { + u8 tempU8[24] = { 0 }; + P384(s, constant7Format, PRF_ROUNDS); // return tag - unpackU96FormatToThreePacket(tempU8, s); - unpackU96FormatToThreePacket((tempU8+12), (s+3)); - if (U32BIG(((u32*)tempU8)[0]) != U32BIG(((u32*)c)[0]) || - U32BIG(((u32*)tempU8)[1]) != U32BIG(((u32*)c)[1]) || - U32BIG(((u32*)tempU8)[2]) != U32BIG(((u32*)c)[2]) || - U32BIG(((u32*)tempU8)[3]) != U32BIG(((u32*)c)[3]) ){ + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); + if (memcmp((void*)tempU8, (void*)(c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; return -1; } return 0; } +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[12] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + Initialize(s,npub,k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessPlaintext(s, m, mlen,c); + // finalization + Finalize_GenerateTag(s, c + mlen); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[12] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessCiphertext(s,m, c, clen - CRYPTO_ABYTES); + // finalization + return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen); +} diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/api.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/api.h new file mode 100644 index 0000000..d8257f4 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/api.h @@ -0,0 +1,7 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c new file mode 100644 index 0000000..2181f8f --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c @@ -0,0 +1,120 @@ +#include"auxFormat.h" + + +//puck begin// +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1_32, t2_64, t2_65; + u32 t[3] = { 0 }; + temp0[0] = in[0] & 0xffe00000; + temp1[0] = (in[0] & 0x001ffc00) << 11; + temp2[0] = (in[0] & 0x000003ff) << 22; + temp0[1] = in[1] & 0xffe00000; + temp1[1] = (in[1] & 0x001ff800) << 11; + t2_64 = ((in[1] & 0x00000400) << 21); + temp2[1] = (in[1] & 0x000003ff) << 22; + temp0[2] = in[2] & 0xffc00000; + t1_32 = ((in[2] & 0x00200000) << 10); + temp1[2] = (in[2] & 0x001ff800) << 11; + t2_65 = ((in[2] & 0x00000400) << 20); + temp2[2] = (in[2] & 0x000003ff) << 22; + unpuckU32ToThree(temp0[0]); + unpuckU32ToThree(temp0[1]); + unpuckU32ToThree(temp0[2]); + t[2] = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; + unpuckU32ToThree(temp1[0]); + unpuckU32ToThree(temp1[1]); + unpuckU32ToThree(temp1[2]); + t[1] = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); + unpuckU32ToThree(temp2[0]); + unpuckU32ToThree(temp2[1]); + unpuckU32ToThree(temp2[2]); + t[0] = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); + memcpy(out, t, 12 * sizeof(unsigned char)); +} +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 t0 = U32BIG(((u32*)in)[2]); + u32 t1 = U32BIG(((u32*)in)[1]); + u32 t2 = U32BIG(((u32*)in)[0]); + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; + t1 = t1 << 1; + t2 = t2 << 2; + temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2; + puckU32ToThree(temp0[0]); + puckU32ToThree(temp0[1]); + puckU32ToThree(temp0[2]); + temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; + puckU32ToThree(temp1[0]); + puckU32ToThree(temp1[1]); + puckU32ToThree(temp1[2]); + temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; + puckU32ToThree(temp2[0]); + puckU32ToThree(temp2[1]); + puckU32ToThree(temp2[2]); + out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); + out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); + out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); +} + +void packU32FormatToThreePacket(u32 * out, u8 * in) { + u32 t2 = U32BIG(((u32*)in)[0]); + u32 temp2[3] = { 0 }; + u8 t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; + t2 = t2 << 2; + temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; + puckU32ToThree(temp2[0]); + puckU32ToThree(temp2[1]); + puckU32ToThree(temp2[2]); + out[0] = (temp2[0] >> 22); + out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); + out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); +} +void unpackU32FormatToThreePacket(u8 * out, u32 * in) { + u32 temp2[3] = { 0 }; + u32 t2_64, t2_65; + u32 t2; + temp2[0] = (in[0] & 0x000003ff) << 22; + + t2_64 = ((in[1] & 0x00000400) << 21); + temp2[1] = (in[1] & 0x000003ff) << 22; + + t2_65 = ((in[2] & 0x00000400) << 20); + temp2[2] = (in[2] & 0x000003ff) << 22; + + unpuckU32ToThree(temp2[0]); + unpuckU32ToThree(temp2[1]); + unpuckU32ToThree(temp2[2]); + t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); + *(u32*)(out) = U32BIG(t2); +} +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + unsigned char i; + for (i = 0; i < lunNum; i++) { +s[0] ^= (round[i] >> 6) & 0x3;\ +s[1] ^= (round[i] >> 3) & 0x7;\ +s[2] ^= round[i] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ +s[5] = LOTR32(s_temp[3], 1); \ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ + } +} +//12*7=84 +unsigned char constant7Format[80] = { + /*constant7Format[127]:*/ + 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, + 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, + 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, + 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, + 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, + 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, + 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,}; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h new file mode 100644 index 0000000..65d914f --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h @@ -0,0 +1,58 @@ + +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define aead_RATE (192 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +//55=3*18+1 +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} + +//////////////////puck begin +//&:5 <<:4 |:4 +#define puckU32ToThree(x){\ +x &= 0x92492492;\ +x = (x | (x << 2)) & 0xc30c30c3;\ +x = (x | (x << 4)) & 0xf00f00f0;\ +x = (x | (x << 8)) & 0xff0000ff;\ +x = (x | (x << 16)) & 0xfff00000;\ +} +#define unpuckU32ToThree(x){\ +x &= 0xfff00000;\ +x = (x | (x >> 16)) & 0xff0000ff;\ +x = (x | (x >> 8)) & 0xf00f00f0;\ +x = (x | (x >> 4)) & 0xc30c30c3;\ +x = (x | (x >> 2)) & 0x92492492;\ +} + +void packU96FormatToThreePacket(u32 * out, u8 * in); +void unpackU96FormatToThreePacket(u8 * out, u32 * in); + +unsigned char constant7Format[80]; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/encrypt.c new file mode 100644 index 0000000..6c2bb34 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/encrypt.c @@ -0,0 +1,181 @@ + +#include"auxFormat.h" + +void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { + u8 tempData[24] = { 0 }; + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); + memcpy(tempData + 4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket(s + 3, tempData); + packU96FormatToThreePacket(s + 6, tempData + 12); + s[9] = 0x80000000; + P384(s, constant7Format, PR0_ROUNDS); +} +void ProcessAssocData(u32 *s, const u8* ad, unsigned long long adlen) { + u32 dataFormat[6] = { 0 }; + u8 tempData[24] = { 0 }; + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket(dataFormat + 3, ad + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + P384(s, constant7Format, PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket(dataFormat + 3, tempData + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + + P384(s, constant7Format, PR_ROUNDS); + } + s[9] ^= 0x80000000; +} + +void ProcessPlaintext(u32 *s, const u8* m, unsigned long long mlen, unsigned char *c) { + u32 dataFormat[6] = { 0 }; + u8 tempData[24] = { 0 }; + if (mlen) { + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket(dataFormat + 3, m + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket(c + 12, s + 3); + + P384(s, constant7Format, PR_ROUNDS); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket(dataFormat + 3, tempData + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + //*c = EXT_BYTE(x0, i); + unpackU96FormatToThreePacket(tempData, s); + unpackU96FormatToThreePacket(tempData + 12, s + 3); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + //c += mlen; + } +} +void Finalize_GenerateTag(u32 *s, unsigned char *c) { + u8 tempData[12] = { 0 }; + P384(s, constant7Format, PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(c , s); + unpackU96FormatToThreePacket(tempData, s + 3); + memcpy(c + 12 , tempData, sizeof(unsigned char) * 4); +} +void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigned long long clen) +{ + u32 dataFormat[12] = { 0 }; + u32 dataFormat_1[12] = { 0 }; + u8 tempU8[24] = { 0 },i; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + packU96FormatToThreePacket(dataFormat + 3, c + 12); + dataFormat_1[3] = s[3] ^ dataFormat[3]; + dataFormat_1[4] = s[4] ^ dataFormat[4]; + dataFormat_1[5] = s[5] ^ dataFormat[5]; + unpackU96FormatToThreePacket(m, dataFormat_1); + unpackU96FormatToThreePacket(m + 12, dataFormat_1 + 3); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; + s[4] = dataFormat[4]; + s[5] = dataFormat[5]; + + P384(s, constant7Format, PR_ROUNDS); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU96FormatToThreePacket(s, tempU8); + packU96FormatToThreePacket(s + 3, tempU8 + 12); + } +} +int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m, unsigned long long *mlen) { + u8 tempU8[24] = { 0 }; + P384(s, constant7Format, PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); + if (memcmp((void*)tempU8, (void*)(c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[12] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + Initialize(s,npub,k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessPlaintext(s, m, mlen,c); + // finalization + Finalize_GenerateTag(s, c + mlen); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[12] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessCiphertext(s,m, c, clen - CRYPTO_ABYTES); + // finalization + return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen); +} diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c new file mode 100644 index 0000000..896381a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c @@ -0,0 +1,103 @@ +#include"auxFormat.h" + +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { + u32 rci,t1,t2; + unsigned char i; + for (i = 0; i < lunNum; i++) { + rci=constant7Format[i];\ + P384_ARC_SC1(rci,s[3],s[6],s[9]); \ + P384_2SC(s[1],s[4],s[7],s[10],s[2],s[5],s[8],s[11]);\ + P384_SR();\ + } +} + + +//puck begin// +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1_32, t2_64, t2_65; + u32 t[3] = { 0 }; + temp0[0] = in[0] & 0xffe00000; + temp1[0] = (in[0] & 0x001ffc00) << 11; + temp2[0] = (in[0] & 0x000003ff) << 22; + temp0[1] = in[1] & 0xffe00000; + temp1[1] = (in[1] & 0x001ff800) << 11; + t2_64 = ((in[1] & 0x00000400) << 21); + temp2[1] = (in[1] & 0x000003ff) << 22; + temp0[2] = in[2] & 0xffc00000; + t1_32 = ((in[2] & 0x00200000) << 10); + temp1[2] = (in[2] & 0x001ff800) << 11; + t2_65 = ((in[2] & 0x00000400) << 20); + temp2[2] = (in[2] & 0x000003ff) << 22; + unpuckU32ToThree(temp0[0]); + unpuckU32ToThree(temp0[1]); + unpuckU32ToThree(temp0[2]); + t[2] = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; + unpuckU32ToThree(temp1[0]); + unpuckU32ToThree(temp1[1]); + unpuckU32ToThree(temp1[2]); + t[1] = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); + unpuckU32ToThree(temp2[0]); + unpuckU32ToThree(temp2[1]); + unpuckU32ToThree(temp2[2]); + t[0] = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); + memcpy(out, t, 12 * sizeof(unsigned char)); +} +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 t0 = U32BIG(((u32*)in)[2]); + u32 t1 = U32BIG(((u32*)in)[1]); + u32 t2 = U32BIG(((u32*)in)[0]); + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 temp0[3] = { 0 }; + u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; + t1 = t1 << 1; + t2 = t2 << 2; + temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2; + puckU32ToThree(temp0[0]); + puckU32ToThree(temp0[1]); + puckU32ToThree(temp0[2]); + temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; + puckU32ToThree(temp1[0]); + puckU32ToThree(temp1[1]); + puckU32ToThree(temp1[2]); + temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; + puckU32ToThree(temp2[0]); + puckU32ToThree(temp2[1]); + puckU32ToThree(temp2[2]); + out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); + out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); + out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); +} + +void packU48FormatToThreePacket(u32 * out, u8 * in) { + u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); + u32 t2 = U32BIG(*(u32*)(in)); + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u8 t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; + t1 = t1 << 1; + t2 = t2 << 2; + temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; + puckU32ToThree(temp1[0]); + puckU32ToThree(temp1[1]); + puckU32ToThree(temp1[2]); + temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; + puckU32ToThree(temp2[0]); + puckU32ToThree(temp2[1]); + puckU32ToThree(temp2[2]); + out[0] = (temp1[0] >> 11) | (temp2[0] >> 22); + out[1] = (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); + out[2] = (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); +} +unsigned char constant7Format[76] = { + /*constant7Format[127]:*/ +0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, +0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, +0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, +0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, +0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, +0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, +0xc7,0x1e,0xd1,0x8f}; diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h index d9dd414..49f00e6 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h @@ -1,10 +1,8 @@ -//#include #include"crypto_aead.h" #include"api.h" +#include #include -#include #include -#include #define U32BIG(x) (x) #define U16BIG(x) (x) @@ -18,13 +16,13 @@ typedef unsigned long long u64; #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) -#define sbox(a, b, c, d, e, f, g, h) \ -{ \ - t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ -} -//////////////////puck begin -//&:5 <<:4 |:4 +/* +s0 s1 s2 +s3 s4 s5 +s6 s7 s8 +s9 s10 s11 +*/ #define puckU32ToThree(x){\ x &= 0x92492492;\ x = (x | (x << 2)) & 0xc30c30c3;\ @@ -39,213 +37,94 @@ x = (x | (x >> 8)) & 0xf00f00f0;\ x = (x | (x >> 4)) & 0xc30c30c3;\ x = (x | (x >> 2)) & 0x92492492;\ } -//u32 t1 、u32 t2 、u8 t2_64 , t2_65 ;u32 temp1[3] = { 0 };u32 temp2[3] = { 0 }; -#define packU48FormatToThreePacket( out, in) {\ -t1 = (u32)U16BIG(*(u16*)(in + 4)); \ -t2 = U32BIG(*(u32*)(in)); \ -t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ -t1 = t1 << 1; \ -t2 = t2 << 2; \ -temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ -puckU32ToThree(temp1[0]); \ -puckU32ToThree(temp1[1]); \ -puckU32ToThree(temp1[2]); \ -temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ -puckU32ToThree(temp2[0]); \ -puckU32ToThree(temp2[1]); \ -puckU32ToThree(temp2[2]); \ -out[0] = (temp1[0] >> 11) | (temp2[0] >> 22); \ -out[1] = (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ -out[2] = (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ -} - - -/* -void packU96FormatToThreePacket(u32 * out, u8 * in) { -u32 t0 = U32BIG(((u32*)in)[2]); -u32 t1 = U32BIG(((u32*)in)[1]); -u32 t2 = U32BIG(((u32*)in)[0]); -u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; -t1 = t1 << 1; -t2 = t2 << 2; -u32 temp0[3] = { 0 }; -temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2; -puckU32ToThree(temp0[0]); -puckU32ToThree(temp0[1]); -puckU32ToThree(temp0[2]); -u32 temp1[3] = { 0 }; -temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; -puckU32ToThree(temp1[0]); -puckU32ToThree(temp1[1]); -puckU32ToThree(temp1[2]); -u32 temp2[3] = { 0 }; -temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; -puckU32ToThree(temp2[0]); -puckU32ToThree(temp2[1]); -puckU32ToThree(temp2[2]); -out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); -out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); -out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); -} -*/ -//t9 t1 t2 t1_32 t2_64 t2_65 temp0[3] temp1[3] temp2[3] -#define packU96FormatToThreePacket(out, in) {\ -t9 = U32BIG(((u32*)in)[2]); \ -t1 = U32BIG(((u32*)in)[1]); \ -t2 = U32BIG(((u32*)in)[0]); \ -t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ -t1 = t1 << 1; \ -t2 = t2 << 2; \ -temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \ -puckU32ToThree(temp0[0]); \ -puckU32ToThree(temp0[1]); \ -puckU32ToThree(temp0[2]); \ -temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ -puckU32ToThree(temp1[0]); \ -puckU32ToThree(temp1[1]); \ -puckU32ToThree(temp1[2]); \ -temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ -puckU32ToThree(temp2[0]); \ -puckU32ToThree(temp2[1]); \ -puckU32ToThree(temp2[2]); \ -out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \ -out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ -out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ -} -/* -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { -u32 temp0[3] = { 0 }; -u32 temp1[3] = { 0 }; -u32 temp2[3] = { 0 }; -u32 t1_32, t2_64, t2_65; -u32 t0, t1, t2; -temp0[0] = in[0] & 0xffe00000; -temp1[0] = (in[0] & 0x001ffc00) << 11; -temp2[0] = (in[0] & 0x000003ff) << 22; -temp0[1] = in[1] & 0xffe00000; -temp1[1] = (in[1] & 0x001ff800) << 11; -t2_64 = ((in[1] & 0x00000400) << 21); -temp2[1] = (in[1] & 0x000003ff) << 22; -temp0[2] = in[2] & 0xffc00000; -t1_32 = ((in[2] & 0x00200000) << 10); -temp1[2] = (in[2] & 0x001ff800) << 11; -t2_65 = ((in[2] & 0x00000400) << 20); -temp2[2] = (in[2] & 0x000003ff) << 22; -unpuckU32ToThree(temp0[0]); -unpuckU32ToThree(temp0[1]); -unpuckU32ToThree(temp0[2]); -t0 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; -unpuckU32ToThree(temp1[0]); -unpuckU32ToThree(temp1[1]); -unpuckU32ToThree(temp1[2]); -t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); -unpuckU32ToThree(temp2[0]); -unpuckU32ToThree(temp2[1]); -unpuckU32ToThree(temp2[2]); -t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); -*(u32*)(out) = U32BIG(t2); -*(u32*)(out + 4) = U32BIG(t1); -*(u32*)(out + 8) = U32BIG(t0); -} -*/ -//u32 temp0[3] = { 0 };u32 temp1[3] = { 0 };u32 temp2[3] = { 0 };u32 t1_32, t2_64, t2_65;t9,t1,t2, -#define unpackU96FormatToThreePacket( out, in) {\ -temp0[0] = in[0] & 0xffe00000; \ -temp1[0] = (in[0] & 0x001ffc00) << 11; \ -temp2[0] = (in[0] & 0x000003ff) << 22; \ -temp0[1] = in[1] & 0xffe00000; \ -temp1[1] = (in[1] & 0x001ff800) << 11; \ -t2_64 = ((in[1] & 0x00000400) << 21); \ -temp2[1] = (in[1] & 0x000003ff) << 22; \ -temp0[2] = in[2] & 0xffc00000; \ -t1_32 = ((in[2] & 0x00200000) << 10); \ -temp1[2] = (in[2] & 0x001ff800) << 11; \ -t2_65 = ((in[2] & 0x00000400) << 20); \ -temp2[2] = (in[2] & 0x000003ff) << 22; \ -unpuckU32ToThree(temp0[0]); \ -unpuckU32ToThree(temp0[1]); \ -unpuckU32ToThree(temp0[2]); \ -t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \ -unpuckU32ToThree(temp1[0]); \ -unpuckU32ToThree(temp1[1]); \ -unpuckU32ToThree(temp1[2]); \ -t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \ -unpuckU32ToThree(temp2[0]); \ -unpuckU32ToThree(temp2[1]); \ -unpuckU32ToThree(temp2[2]); \ -t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ -*(u32*)(out) = U32BIG(t2); \ -*(u32*)(out + 4) = U32BIG(t1); \ -*(u32*)(out + 8) = U32BIG(t9); \ -} - -#define U96_BIT_LOTR32_1(t0,t1,t2,t3,t4,t5){\ -t3= t1;\ -t4 = t2;\ -t5 = LOTR32(t0, 1); \ -} -#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ -t3= LOTR32(t2, 2);\ -t4 =LOTR32(t0, 3);\ -t5 = LOTR32(t1, 3); \ -} -//55=3*18+1 -#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ -t3= LOTR32(t1, 18); \ -t4 = LOTR32(t2, 18);\ -t5 = LOTR32(t0, 19); \ -} -/* -s0 s1 s2 -s3 s4 s5 -s6 s7 s8 -s9 s10 s11 -*/ -#define ROUND384(lunNum) {\ -s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ -s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ -s[2] ^= constant7Format[lunNum] & 0x7;\ -sbox(s[0], s[3], s[6], s[9] , s_temp[0], s_temp[3], s_temp[6], s_temp[9]);\ -sbox(s[1], s[4], s[7], s[10], s_temp[1], s_temp[4], s_temp[7], s_temp[10]);\ -sbox(s[2], s[5], s[8], s[11], s_temp[2], s_temp[5], s_temp[8], s_temp[11]);\ -s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2];\ -U96_BIT_LOTR32_1(s_temp[3], s_temp [4], s_temp[ 5], s[3], s[4], s[5]);\ -U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ -U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ -} -#define ROUND384Full(lunNum) {\ -printf(" constant7Format[%d]=%08x\n", lunNum, constant7Format[lunNum]);\ -s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ -s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ -s[2] ^= constant7Format[lunNum] & 0x7;\ - printfU96Format("addition of round constant output",s);\ -sbox(s[0], s[3], s[6], s[9] , s_temp[0], s_temp[3], s_temp[6], s_temp[9]);\ -sbox(s[1], s[4], s[7], s[10], s_temp[1], s_temp[4], s_temp[7], s_temp[10]);\ -sbox(s[2], s[5], s[8], s[11], s_temp[2], s_temp[5], s_temp[8], s_temp[11]);\ - printfU96Format("substitution layer output",s_temp);\ -s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2];\ -U96_BIT_LOTR32_1(s_temp[3], s_temp [4], s_temp[ 5], s[3], s[4], s[5]);\ -U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ -U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ -printfU96Format("linear diffusion layer output", s);\ -} - -void printBinarySimp(unsigned char * str, u8 *a, int len); -//void packU96FormatToThreePacketFull(unsigned int * out, u8 * in); -//void unpackU96FormatToThreePacketFull(u8 * out, unsigned int * in); -//void packU96FormatToThreePacket(u32 * out, u8 * in); -//void unpackU96FormatToThreePacket(u8 * out, u32 * in); -void printU32State(char name[], unsigned int* var, long len); -void printfU96Format(char name[], unsigned int * s); -//////////////////puck end -void printU8(char name[], u8 var[], int len, int offset); -void printfU96Format(char name[], u32 * s); - -////////////constant begin// -//unsigned char constant7Format[127]; -void puckU8FormatToThreePacket(u8 in, u8 *out); -//void test_puckU8FormatToThreePacket(); -////////////constant end// - -static void permutation384(unsigned int *in, int rounds, unsigned char *rc); +unsigned char constant7Format[76]; +#define P384_ARC_SC1(rci,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*add round const s0 s1 s2 */ \n\t"\ + "ands %[t1], %[rci], #0xc0\n\t" \ + "eors %[S_0], %[S_0], %[t1], LSR #6 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x38\n\t" \ + "eors %[S_1], %[S_1], %[t1], LSR #3 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x7\n\t" \ + "eors %[S_3], %[S_3], %[t1] \n\t" /*s[2] ^= constant7Format[lunNum] & 0x7;*/\ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1],%[S_0] \n\t"\ + "eors %[S_0], %[t2],%[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1], %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2), [rci] "+r" (rci), \ + [S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_3] "+r" (s[2]),\ + [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define P384_2SC(S1,S2,S3,S4,S5,S6,S7,S8) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1],%[S_0] \n\t"\ + "eors %[S_0], %[t2],%[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1], %[S_2] \n\t"\ + "/*sbox column*/ \n\t"\ + "mvns %[S_1], %[S_1] \n\t"\ + "ands %[t1], %[S_3], %[S_1] \n\t"\ + "eors %[t1], %[S_5], %[t1] \n\t"\ + "orrs %[S_5], %[S_3], %[S_5] \n\t"\ + "eors %[S_1], %[S_7], %[S_1] \n\t"\ + "eors %[S_5], %[S_5], %[S_1] \n\t"\ + "eors %[t2], %[S_3], %[S_7] \n\t"\ + "eors %[S_7], %[S_7], %[t1] \n\t"\ + "ands %[S_1], %[t1],%[S_1] \n\t"\ + "eors %[S_1], %[t2],%[S_1] \n\t"\ + "ands %[S_3], %[S_5], %[t2] \n\t"\ + "eors %[S_3], %[t1], %[S_3] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) ,\ + [S_1] "+r" (S5), [S_3] "+r" (S6), [S_5] "+r" (S7), [S_7] "+r" (S8)\ + : : );\ +}while (0) +#define P384_SR() \ + do { \ + __asm__ __volatile__ ( \ + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ + "mov %[t1], %[S_3] \n\t"\ + "mov %[S_3], %[S_4] \n\t"\ + "mov %[S_4], %[S_5] \n\t"\ + "ROR %[S_5], %[t1] , #31 \n\t"\ + "/*rotate shift left 8 bits [w10 w6 w2-> 锛坵6,3) (w2,3) ( w10,2)]*/ \n\t"\ + "mov %[t1], %[S_8] \n\t"\ + "ROR %[S_8], %[S_7] , #29 \n\t"\ + "ROR %[S_7], %[S_6] , #29 \n\t"\ + "ROR %[S_6], %[t1] , #30 \n\t"\ + "/*rotate shift left 55 bit [w11 w7 w3-> 锛坵3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t1], %[S_9] \n\t"\ + "ROR %[S_9], %[S_10] , #14 \n\t"\ + "ROR %[S_10], %[S_11] , #14 \n\t"\ + "ROR %[S_11], %[t1] , #13 \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1),\ + [S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\ + [S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\ + [S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\ + : : );\ +}while (0) diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c index ab4ac31..f4fc2ce 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c @@ -1,176 +1,47 @@ - #include"auxFormat.h" #define aead_RATE (96 / 8) #define PR0_ROUNDS 76 #define PR_ROUNDS 40 #define PRF_ROUNDS 44 -unsigned char constant7Format[127] = { - /*constant7Format[127]:*/ -0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, -0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, -0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, -0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, -0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, -0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, -0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, -0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, -0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46, -0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb, -0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, }; - -/* State - * w8 w4 w0 - * w9 w5 w1 - * w10 w6 w2 - * w11 w7 w3 - */ - static void permutation384(unsigned int *in, int rounds, unsigned char *rc) { - - uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11; - uint32_t s0, s1, s2; - uint32_t i=0; - __asm volatile( - "ldr w0, [in] \n\t" - "ldr w4, [in, #4] \n\t" - "ldr w8, [in, #8] \n\t" - "ldr w1, [in, #12] \n\t" - "ldr w5, [in, #16] \n\t" - "ldr w9, [in, #20] \n\t" - "ldr w2, [in, #24] \n\t" - "ldr w6, [in, #28] \n\t" - "ldr w10, [in, #32] \n\t" - "ldr w3, [in, #36] \n\t" - "ldr w7, [in, #40] \n\t" - "ldr w11, [in, #44] \n\t" - "enc_loop: \n\t" - "/*add round const s0 s1*/ \n\t" - "ldrb s0, [rc] \n\t" - "LSR s1, s0, #6 \n\t" - "and s1, s1, 0x3 \n\t" - "LSR s2, s0, #3 \n\t" - "and s2, s2, 0x7 \n\t" - "and s0, s0, 0x7 \n\t" - "eors w8, w8, s0 \n\t" - "eors w4, w4, s2 \n\t" - "eors w0, w0, s1 \n\t" - "/*sbox first column*/ \n\t" - "mvns w0, w0 \n\t" - "ands s0, w1, w0 \n\t" - "eors s0, w2, s0 \n\t" - "orrs w2, w1, w2 \n\t" - "eors w0, w3, w0 \n\t" - "eors w2, w2, w0 \n\t" - "eors s1, w1, w3 \n\t" - "eors w3, w3, s0 \n\t" - "ands w0, s0, w0 \n\t" - "eors w0, s1, w0 \n\t" - "ands w1, w2, s1 \n\t" - "eors w1, s0, w1 \n\t" - "/*sbox second column*/ \n\t" - "mvns w4, w4 \n\t" - "ands s0, w5, w4 \n\t" - "eors s0, w6, s0 \n\t" - "orrs w6, w5, w6 \n\t" - "eors w4, w7, w4 \n\t" - "eors w6, w6, w4 \n\t" - "eors s1, w5, w7 \n\t" - "eors w7, w7, s0 \n\t" - "ands w4, s0, w4 \n\t" - "eors w4, s1, w4 \n\t" - "ands w5, w6, s1 \n\t" - "eors w5, s0, w5 \n\t" - "/*sbox third column*/ \n\t" - "mvns w8, w8 \n\t" - "ands s0, w9, w8 \n\t" - "eors s0, w10, s0 \n\t" - "orrs w10, w9, w10 \n\t" - "eors w8, w11, w8 \n\t" - "eors w10, w10, w8 \n\t" - "eors s1, w9, w11 \n\t" - "eors w11, w11, s0 \n\t" - "ands w8, s0, w8 \n\t" - "eors w8, s1, w8 \n\t" - "ands w9, w10, s1 \n\t" - "eors w9, s0, w9 \n\t" - "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t" - "mov s0, w1 \n\t" - "mov w1, w5 \n\t" - "mov w5, w9 \n\t" - "ROR w9, s0, #31 \n\t" - "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t" - "mov s0, w10 \n\t" - "ROR w10, w6 , #29 \n\t" - "ROR w6, w2 , #29 \n\t" - "ROR w2, s0, #30 \n\t" - "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t" - "mov s0, w3 \n\t" - "ROR w3, w7 , #14 \n\t" - "ROR w7, w11 , #14 \n\t" - "ROR w11, s0, #13 \n\t" - "/*loop control*/ \n\t" - "adds rc, rc, #1 \n\t" - "subs rounds, rounds, #1 \n\t" - "bne enc_loop \n\t" - "str w0, [in] \n\t" - "str w4, [in, #4] \n\t" - "str w8, [in, #8] \n\t" - "str w1, [in, #12] \n\t" - "str w5, [in, #16] \n\t" - "str w9, [in, #20] \n\t" - "str w2, [in, #24] \n\t" - "str w6, [in, #28] \n\t" - "str w10, [in, #32] \n\t" - "str w3, [in, #36] \n\t" - "str w7, [in, #40] \n\t" - "str w11, [in, #44] \n\t" - ); +void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket(s + 3, npub + 12); + packU96FormatToThreePacket(s + 6, k); + packU96FormatToThreePacket(s + 9, k + 12); + P384(s, constant7Format, PR0_ROUNDS); } -int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const unsigned char *k) { +void ProcessAssocData(u32 *s, const u8* ad, unsigned long long adlen) { - u8 i; - u32 s[12] = { 0 }; u32 dataFormat[3] = { 0 }; u8 tempData[12] = { 0 }; - u32 s_temp[12] = { 0 }; - u32 t1, t2, t3, t5, t6, t8, t9, t11; - u32 t1_32, t2_64, t2_65; - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - *clen = mlen + CRYPTO_ABYTES; - // initialization - packU96FormatToThreePacket(s, npub); - packU96FormatToThreePacket((s + 3), (npub + 12)); - packU96FormatToThreePacket((s + 6), k); - packU96FormatToThreePacket((s + 9), (k + 12)); -permutation384(s,PR0_ROUNDS,constant7Format); - // process associated data if (adlen) { while (adlen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, ad); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; -permutation384(s,PR_ROUNDS,constant7Format); + P384(s, constant7Format, PR_ROUNDS); adlen -= aead_RATE; ad += aead_RATE; } - memset(tempData, 0, sizeof(tempData)); - memcpy(tempData, ad, adlen); + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); tempData[adlen] = 0x01; packU96FormatToThreePacket(dataFormat, tempData); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; - permutation384(s,PR_ROUNDS,constant7Format); + P384(s, constant7Format, PR_ROUNDS); } s[9] ^= 0x80000000; + +} +void ProcessPlaintext(u32 *s, const u8* m, unsigned long long mlen, unsigned char *c) { + + u32 dataFormat[3] = { 0 }; + u8 tempData[12] = { 0 }; if (mlen) { while (mlen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, m); @@ -178,78 +49,49 @@ permutation384(s,PR_ROUNDS,constant7Format); s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; unpackU96FormatToThreePacket(c, s); - permutation384(s,PR_ROUNDS,constant7Format); + P384(s, constant7Format, PR_ROUNDS); mlen -= aead_RATE; m += aead_RATE; c += aead_RATE; } memset(tempData, 0, sizeof(tempData)); - memcpy(tempData, m, mlen); + memcpy(tempData, m, mlen * sizeof(unsigned char)); tempData[mlen] = 0x01; packU96FormatToThreePacket(dataFormat, tempData); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; unpackU96FormatToThreePacket(tempData, s); - memcpy(c, tempData, mlen); + memcpy(c, tempData, mlen * sizeof(unsigned char)); c += mlen; } - // finalization - permutation384(s,PRF_ROUNDS,constant7Format); +} + +void Finalize_GenerateTag(u32 *s, unsigned char *c) { + P384(s, constant7Format, PRF_ROUNDS); // return tag unpackU96FormatToThreePacket(c, s); - unpackU96FormatToThreePacket((c + 12), (s + 3)); + unpackU96FormatToThreePacket(c + 12, s + 3); + +} +int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m, unsigned long long *mlen) { + u8 tempU8[32] = { 0 }; + P384(s, constant7Format, PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); + if (memcmp((void*)tempU8, (void*)(c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } return 0; } - -int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, const unsigned char *k) { - u8 i, j; - u32 s[12] = { 0 }; +void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigned long long clen) +{ u32 dataFormat[6] = { 0 }; u32 dataFormat_1[3] = { 0 }; - u8 tempData[12] = { 0 }; - u8 tempU8[48] = { 0 }; - u32 s_temp[12] = { 0 }; - u32 t1, t2, t3, t5, t6, t8, t9, t11; - u32 t1_32, t2_64, t2_65; - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - *mlen = clen - CRYPTO_ABYTES; - if (clen < CRYPTO_ABYTES) - return -1; - // initialization - packU96FormatToThreePacket(s, npub); - packU96FormatToThreePacket((s + 3), (npub + 12)); - packU96FormatToThreePacket((s + 6), k); - packU96FormatToThreePacket((s + 9), (k + 12)); -permutation384(s,PR0_ROUNDS,constant7Format); - // process associated data - if (adlen) { - while (adlen >= aead_RATE) { - packU96FormatToThreePacket(dataFormat, ad); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; -permutation384(s,PR_ROUNDS,constant7Format); - adlen -= aead_RATE; - ad += aead_RATE; - } - memset(tempData, 0, sizeof(tempData)); - - memcpy(tempData, ad, adlen); - tempData[adlen] = 0x01; - packU96FormatToThreePacket(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - permutation384(s,PR_ROUNDS,constant7Format); - } - s[9] ^= 0x80000000; - clen -= CRYPTO_ABYTES; + u8 i,tempU8[48] = { 0 }; if (clen) { while (clen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, c); @@ -260,7 +102,7 @@ permutation384(s,PR_ROUNDS,constant7Format); s[0] = dataFormat[0]; s[1] = dataFormat[1]; s[2] = dataFormat[2]; - permutation384(s,PR_ROUNDS,constant7Format); + P384(s, constant7Format, PR_ROUNDS); clen -= aead_RATE; m += aead_RATE; c += aead_RATE; @@ -274,14 +116,36 @@ permutation384(s,PR_ROUNDS,constant7Format); tempU8[i] ^= 0x01; packU96FormatToThreePacket(s, tempU8); } - // finalization - permutation384(s,PRF_ROUNDS,constant7Format); - // return tag - packU96FormatToThreePacket(dataFormat, c); - packU96FormatToThreePacket((dataFormat + 3), (c + 12)); - if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3] - || dataFormat[4] != s[4] || dataFormat[5] != s[5]) { - return -1; - } + +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[12] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessPlaintext(s, m, mlen, c); + Finalize_GenerateTag(s, c + mlen); return 0; } + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[12] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessCiphertext(s, m, c, clen - CRYPTO_ABYTES); + // finalization + return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen); +} diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4/api.h b/knot/Implementations/crypto_aead/knot192/armcortexm_4/api.h new file mode 100644 index 0000000..c3cb1d9 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c new file mode 100644 index 0000000..7c1f7cf --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c @@ -0,0 +1,109 @@ +#include"auxFormat.h" + +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + unsigned char i; + for (i = 0; i < lunNum; i++) { + s[0] ^= (round[i] >> 6) & 0x3; \ + s[1] ^= (round[i] >> 3) & 0x7; \ + s[2] ^= round[i] & 0x7; \ + sbox(s[0], s[3], s[6], s[9], s_temp[3], s_temp[6], s_temp[9]); \ + sbox(s[1], s[4], s[7], s[10], s[3], s_temp[7], s_temp[10]); \ + sbox(s[2], s[5], s[8], s[11], s[4], s_temp[8], s_temp[11]); \ + s[5] = LOTR32(s_temp[3], 1); \ + U96_BIT_LOTR32_8(s_temp[6], s_temp[7], s_temp[8], s[6], s[7], s[8]); \ + U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]); \ + } +} + + +//puck begin// +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1_32, t2_64, t2_65; + u32 t[3] = { 0 }; + temp0[0] = in[0] & 0xffe00000; + temp1[0] = (in[0] & 0x001ffc00) << 11; + temp2[0] = (in[0] & 0x000003ff) << 22; + temp0[1] = in[1] & 0xffe00000; + temp1[1] = (in[1] & 0x001ff800) << 11; + t2_64 = ((in[1] & 0x00000400) << 21); + temp2[1] = (in[1] & 0x000003ff) << 22; + temp0[2] = in[2] & 0xffc00000; + t1_32 = ((in[2] & 0x00200000) << 10); + temp1[2] = (in[2] & 0x001ff800) << 11; + t2_65 = ((in[2] & 0x00000400) << 20); + temp2[2] = (in[2] & 0x000003ff) << 22; + unpuckU32ToThree(temp0[0]); + unpuckU32ToThree(temp0[1]); + unpuckU32ToThree(temp0[2]); + t[2] = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; + unpuckU32ToThree(temp1[0]); + unpuckU32ToThree(temp1[1]); + unpuckU32ToThree(temp1[2]); + t[1] = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); + unpuckU32ToThree(temp2[0]); + unpuckU32ToThree(temp2[1]); + unpuckU32ToThree(temp2[2]); + t[0] = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); + memcpy(out, t, 12 * sizeof(unsigned char)); +} +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 t0 = U32BIG(((u32*)in)[2]); + u32 t1 = U32BIG(((u32*)in)[1]); + u32 t2 = U32BIG(((u32*)in)[0]); + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 temp0[3] = { 0 }; + u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; + t1 = t1 << 1; + t2 = t2 << 2; + temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2; + puckU32ToThree(temp0[0]); + puckU32ToThree(temp0[1]); + puckU32ToThree(temp0[2]); + temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; + puckU32ToThree(temp1[0]); + puckU32ToThree(temp1[1]); + puckU32ToThree(temp1[2]); + temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; + puckU32ToThree(temp2[0]); + puckU32ToThree(temp2[1]); + puckU32ToThree(temp2[2]); + out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); + out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); + out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); +} + +void packU48FormatToThreePacket(u32 * out, u8 * in) { + u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); + u32 t2 = U32BIG(*(u32*)(in)); + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u8 t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; + t1 = t1 << 1; + t2 = t2 << 2; + temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; + puckU32ToThree(temp1[0]); + puckU32ToThree(temp1[1]); + puckU32ToThree(temp1[2]); + temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; + puckU32ToThree(temp2[0]); + puckU32ToThree(temp2[1]); + puckU32ToThree(temp2[2]); + out[0] = (temp1[0] >> 11) | (temp2[0] >> 22); + out[1] = (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); + out[2] = (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); +} +unsigned char constant7Format[76] = { + /*constant7Format[127]:*/ +0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, +0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, +0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, +0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, +0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, +0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, +0xc7,0x1e,0xd1,0x8f}; diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h new file mode 100644 index 0000000..5870426 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h @@ -0,0 +1,57 @@ +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +//55=3*18+1 +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} +/* +s0 s1 s2 +s3 s4 s5 +s6 s7 s8 +s9 s10 s11 +*/ +#define puckU32ToThree(x){\ +x &= 0x92492492;\ +x = (x | (x << 2)) & 0xc30c30c3;\ +x = (x | (x << 4)) & 0xf00f00f0;\ +x = (x | (x << 8)) & 0xff0000ff;\ +x = (x | (x << 16)) & 0xfff00000;\ +} +#define unpuckU32ToThree(x){\ +x &= 0xfff00000;\ +x = (x | (x >> 16)) & 0xff0000ff;\ +x = (x | (x >> 8)) & 0xf00f00f0;\ +x = (x | (x >> 4)) & 0xc30c30c3;\ +x = (x | (x >> 2)) & 0x92492492;\ +} + +unsigned char constant7Format[76]; + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4/crypto_aead.h b/knot/Implementations/crypto_aead/knot192/armcortexm_4/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_4/encrypt.c new file mode 100644 index 0000000..f4fc2ce --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4/encrypt.c @@ -0,0 +1,151 @@ +#include"auxFormat.h" + +#define aead_RATE (96 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 +void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket(s + 3, npub + 12); + packU96FormatToThreePacket(s + 6, k); + packU96FormatToThreePacket(s + 9, k + 12); + P384(s, constant7Format, PR0_ROUNDS); +} + +void ProcessAssocData(u32 *s, const u8* ad, unsigned long long adlen) { + + u32 dataFormat[3] = { 0 }; + u8 tempData[12] = { 0 }; + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384(s, constant7Format, PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384(s, constant7Format, PR_ROUNDS); + } + s[9] ^= 0x80000000; + +} +void ProcessPlaintext(u32 *s, const u8* m, unsigned long long mlen, unsigned char *c) { + + u32 dataFormat[3] = { 0 }; + u8 tempData[12] = { 0 }; + if (mlen) { + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(c, s); + P384(s, constant7Format, PR_ROUNDS); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } +} + +void Finalize_GenerateTag(u32 *s, unsigned char *c) { + P384(s, constant7Format, PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket(c + 12, s + 3); + +} +int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m, unsigned long long *mlen) { + u8 tempU8[32] = { 0 }; + P384(s, constant7Format, PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); + if (memcmp((void*)tempU8, (void*)(c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} +void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigned long long clen) +{ + u32 dataFormat[6] = { 0 }; + u32 dataFormat_1[3] = { 0 }; + u8 i,tempU8[48] = { 0 }; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + unpackU96FormatToThreePacket(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + P384(s, constant7Format, PR_ROUNDS); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU96FormatToThreePacket(s, tempU8); + } + +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[12] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessPlaintext(s, m, mlen, c); + Finalize_GenerateTag(s, c + mlen); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[12] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessCiphertext(s, m, c, clen - CRYPTO_ABYTES); + // finalization + return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen); +} diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.c new file mode 100644 index 0000000..93f0bb8 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.c @@ -0,0 +1,81 @@ +#include"auxFormat.h" + +void P512(unsigned int *s, unsigned char *round, unsigned char rounds) { + u32 rci,t1,t2,t3,t9; + unsigned char i; + for (i = 0; i < rounds; i++) { + rci=round[0]; \ + P512_ARC_1(rci);\ + for (i = 1;i < rounds;i++) {\ + P512_2SC(s[0],s[4],s[8] ,s[12],s[1],s[5],s[9] ,s[13]);\ + P512_2SC(s[2],s[6],s[10],s[14],s[3],s[7],s[11],s[15]);\ + P512_SR_1();\ + rci=round[i];\ + P512_SR_ARC_2(rci);\ + }\ + P512_2SC(s[0],s[4],s[8] ,s[12],s[1],s[5],s[9] ,s[13]);\ + P512_2SC(s[2],s[6],s[10],s[14],s[3],s[7],s[11],s[15]);\ + P512_SR_1();\ + P512_SR_2();\ + + } +} +void packU128FormatToFourPacket(u32 * out, u8 * in) { + u32 t0 = U32BIG(((u32* )in)[0]); + u32 t1 = U32BIG(((u32* )in)[1]); + u32 t2 = U32BIG(((u32* )in)[2]); + u32 t3 = U32BIG(((u32* )in)[3]); + u32 temp1; + puck32(t0); + puck32(t0); + puck32(t1); + puck32(t1); + puck32(t2); + puck32(t2); + puck32(t3); + puck32(t3); + out[3] = (t3 & 0xff000000) | ((t2 >> 8) & 0x00ff0000) + | ((t1 >> 16) & 0x0000ff00) | (t0 >> 24); + out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) + | ((t1 >> 8) & 0x0000ff00) | ((t0 >> 16) & 0x000000ff); + out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) + | (t1 & 0x0000ff00) | ((t0 >> 8) & 0x000000ff); + out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) + | ((t1 << 8) & 0x0000ff00) | (t0 & 0x000000ff); +} +void unpackU128FormatToFourPacket(u8 * out, u32 * in) { + u32 t[4] = { 0 }; + u32 r0; + t[3] = (in[3] & 0xff000000 )| ((in[2] >> 8) & 0x00ff0000) + | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); + t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) + | ((in[1] >> 8) & 0x0000ff00) | ((in[0] >> 16) & 0x000000ff); + t[1] = ((in[3] << 16) & 0xff000000) | ((in[2] << 8) & 0x00ff0000) + | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); + t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) + | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); + unpuck32(t[0]); + unpuck32(t[0]); + unpuck32(t[1]); + unpuck32(t[1]); + unpuck32(t[2]); + unpuck32(t[2]); + unpuck32(t[3]); + unpuck32(t[3]); + memcpy(out, t, 16 * sizeof(unsigned char)); +} + + +unsigned char constant7Format_aead[100] = { +/*constant7_aead_256*/ +0x1, 0x4, 0x10, 0x40, 0x2, 0x8, 0x21, 0x5, 0x14, 0x50, 0x42, 0xa, 0x29, 0x24, + 0x11, 0x44, 0x12, 0x48, 0x23, 0xd, 0x35, 0x55, 0x56, 0x5a, 0x6b, 0x2e, + 0x38, 0x60, 0x3, 0xc, 0x31, 0x45, 0x16, 0x58, 0x63, 0xf, 0x3d, 0x74, + 0x53, 0x4e, 0x3b, 0x6c, 0x32, 0x49, 0x27, 0x1d, 0x75, 0x57, 0x5e, 0x7b, + 0x6e, 0x3a, 0x68, 0x22, 0x9, 0x25, 0x15, 0x54, 0x52, 0x4a, 0x2b, 0x2c, + 0x30, 0x41, 0x6, 0x18, 0x61, 0x7, 0x1c, 0x71, 0x47, 0x1e, 0x79, 0x66, + 0x1b, 0x6d, 0x36, 0x59, 0x67, 0x1f, 0x7d, 0x76, 0x5b, 0x6f, 0x3e, 0x78, + 0x62, 0xb, 0x2d, 0x34, 0x51, 0x46, 0x1a, 0x69, 0x26, 0x19, 0x65, 0x17, + 0x5c, 0x73, + +}; diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h index d7d877f..b8a7864 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h @@ -1,115 +1,147 @@ -//#include #include"crypto_aead.h" #include"api.h" #include -#include -#include #define U32BIG(x) (x) + #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) -#define sbox(a, b, c, d, e, f, g, h) \ -{ \ - t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ -} - typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; -void printfU128Format(char name[], u32 * in); -void printU8(char name[], u8 var[], long len, int offset); +//new +void puckU8FormatToFourPacket(u8 in, u8 *out); -//使用t9 #define puck32(in)\ {\ -t9 = (in ^ (in >> 1)) & 0x22222222; in ^= t9 ^ (t9 << 1);\ -t9 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= t9 ^ (t9 << 2);\ -t9 = (in ^ (in >> 4)) & 0x00F000F0; in ^= t9 ^ (t9 << 4);\ -t9 = (in ^ (in >> 8)) & 0x0000FF00; in ^= t9 ^ (t9 << 8);\ +temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ +temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ +temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ +temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ } -//使用t9 #define unpuck32(t0){\ - t9 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= t9 ^ (t9 << 8); \ - t9 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= t9 ^ (t9 << 4); \ - t9 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= t9 ^ (t9 << 2); \ - t9 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= t9 ^ (t9 << 1); \ -} -//u32 t1, t2, t3,t8, -#define packU128FormatToFourPacket(out,in) {\ - t8 = U32BIG(((u32*)in)[0]); \ - t1 = U32BIG(((u32*)in)[1]); \ - t2 = U32BIG(((u32*)in)[2]); \ - t3 = U32BIG(((u32*)in)[3]); \ - puck32(t8); puck32(t8); \ - puck32(t1); puck32(t1); \ - puck32(t2); puck32(t2); \ - puck32(t3); puck32(t3); \ - out[3] = t3 & 0xff000000 | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t8 >> 24); \ - out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) | ((t1 >> 8) & 0x0000ff00) | ((t8 >> 16) & 0x000000ff); \ - out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) | (t1 & 0x0000ff00) | ((t8 >> 8) & 0x000000ff); \ - out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) | ((t1 << 8) & 0x0000ff00) | (t8 & 0x000000ff); \ -} -//u32 dataFormat[4],u32 t1, t2, t3,t8, -#define unpackU128FormatToFourPacket( out, in) {\ -memcpy(dataFormat, in, sizeof(unsigned int) * 4); \ -t3 = dataFormat[3] & 0xff000000 | ((dataFormat[2] >> 8) & 0x00ff0000) | ((dataFormat[1] >> 16) & 0x0000ff00) | (dataFormat[0] >> 24); \ -t2 = ((dataFormat[3] << 8) & 0xff000000) | (dataFormat[2] & 0x00ff0000) | ((dataFormat[1] >> 8) & 0x0000ff00) | ((dataFormat[0] >> 16) & 0x000000ff); \ -t1 = ((dataFormat[3] << 16) & 0xff000000) | ((dataFormat[2] << 8) & 0x00ff0000) | (dataFormat[1] & 0x0000ff00) | ((dataFormat[0] >> 8) & 0x000000ff); \ -t8 = ((dataFormat[3] << 24) & 0xff000000) | ((dataFormat[2] << 16) & 0x00ff0000) | ((dataFormat[1] << 8) & 0x0000ff00) | (dataFormat[0] & 0x000000ff); \ -unpuck32(t8); unpuck32(t8); \ -unpuck32(t1); unpuck32(t1); \ -unpuck32(t2); unpuck32(t2); \ -unpuck32(t3); unpuck32(t3); \ -((u32*)out)[0] = U32BIG(t8); \ -((u32*)out)[1] = U32BIG(t1); \ -((u32*)out)[2] = U32BIG(t2); \ -((u32*)out)[3] = U32BIG(t3); \ -} -//u32 t1 ;u32 t2 = -#define packU64FormatToFourPacket( out, in) {\ -t1 = U32BIG(((u32*)in)[0]); \ -t2 = U32BIG(((u32*)in)[1]); \ -puck32(t1); \ -puck32(t1); \ -puck32(t2); \ -puck32(t2); \ -out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); \ -out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); \ -out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); \ -out[0] = ((t2 << 8) & 0x0000ff00) | (t1 & 0x000000ff); \ -} -#define BIT_LOTR32_1(t0,t1,t2,t3,t4,t5,t6,t7){\ -t4= LOTR32(t3, 1);\ -t5 = t0;\ -t6 = t1; \ -t7 = t2; \ -} -#define BIT_LOTR32_16(t0,t1,t2,t3,t4,t5,t6,t7){\ -t4= LOTR32(t0, 4);\ -t5 = LOTR32(t1, 4);\ -t6 = LOTR32(t2, 4); \ -t7 = LOTR32(t3, 4); \ -} -#define BIT_LOTR32_25(t0,t1,t2,t3,t4,t5,t6,t7){\ -t4= LOTR32(t3, 7);\ -t5 = LOTR32(t0, 6);\ -t6 = LOTR32(t1, 6); \ -t7 = LOTR32(t2, 6); \ + r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ + r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ + r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ + r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ } -#define ROUND512( arr,lunNum) {\ -s[3] ^= (arr[lunNum] >> 6) & 0x3;\ -s[2] ^= (arr[lunNum] >> 4) & 0x3;\ -s[1] ^= (arr[lunNum] >> 2) & 0x3;\ -s[0] ^= arr[lunNum] & 0x3;\ -sbox(s[0], s[4], s[8], s[12], s_temp[0], s_temp[4], s_temp[8], s_temp[12]);\ -sbox(s[1], s[5], s[9], s[13], s_temp[1], s_temp[5], s_temp[9], s_temp[13]);\ -sbox(s[2], s[6], s[10], s[14], s_temp[2], s_temp[6], s_temp[10], s_temp[14]);\ -sbox(s[3], s[7], s[11], s[15], s_temp[3], s_temp[7], s_temp[11], s_temp[15]);\ -s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2], s[3] = s_temp[3];\ -BIT_LOTR32_1(s_temp[4], s_temp[5], s_temp[6], s_temp[7], s[4], s[5], s[6], s[7]);\ -BIT_LOTR32_16(s_temp[8], s_temp[9], s_temp[10], s_temp[11], s[8], s[9], s[10], s[11]);\ -BIT_LOTR32_25(s_temp[12], s_temp[13], s_temp[14], s_temp[15], s[12], s[13], s[14], s[15]);\ -} +unsigned char constant7Format_aead[100]; + +//t1 +#define P512_ARC_1(rci) \ + do { \ + __asm__ __volatile__ ( \ + "/*add round const s0 s1 s2 s3*/ \n\t"\ + "ands %[t1] , %[rci], #0xc0\n\t" \ + "eors %[S_3], %[S_3], %[t1], LSR #6 \n\t" /*s[3] ^= (constant7Format_aead[lunNum] >> 6) & 0x3;*/\ + "ands %[t2] , %[rci], #0x30\n\t" \ + "eors %[S_2], %[S_2], %[t2], LSR #4 \n\t" /*s[2] ^= (constant7Format_aead[lunNum] >> 4) & 0x3;*/\ + "ands %[t3] , %[rci], #0xc\n\t" \ + "eors %[S_1], %[S_1], %[t3], LSR #2 \n\t" /*s[1] ^= (constant7Format_aead[lunNum] >> 2) & 0x3;*/\ + "ands %[t4] , %[rci], #0x3\n\t" \ + "eors %[S_0], %[S_0], %[t4] \n\t" /*s[0] ^= constant7Format_aead[lunNum] & 0x3;*/\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2), [t3] "=r" (t3), [t4] "=r" (t9), [rci] "+r" (rci),\ + [S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_2] "+r" (s[2]),[S_3] "+r" (s[3])\ + : : );\ +}while (0) +//t1 t2 +#define P512_2SC(S1,S2,S3,S4,S5,S6,S7,S8) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + "/*sbox column*/ \n\t"\ + "mvns %[S_1], %[S_1] \n\t"\ + "ands %[t3], %[S_3], %[S_1] \n\t"\ + "eors %[t3], %[S_5], %[t3] \n\t"\ + "orrs %[S_5], %[S_3], %[S_5] \n\t"\ + "eors %[S_1], %[S_7], %[S_1] \n\t"\ + "eors %[S_5], %[S_5], %[S_1] \n\t"\ + "eors %[t4], %[S_3], %[S_7] \n\t"\ + "eors %[S_7], %[S_7], %[t3] \n\t"\ + "ands %[S_1], %[t3] , %[S_1] \n\t"\ + "eors %[S_1], %[t4] , %[S_1] \n\t"\ + "ands %[S_3], %[S_5], %[t4] \n\t"\ + "eors %[S_3], %[t3] , %[S_3] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2), [t3] "=r" (t3), [t4] "=r" (t9),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) ,\ + [S_1] "+r" (S5), [S_3] "+r" (S6), [S_5] "+r" (S7), [S_7] "+r" (S8)\ + : : );\ +}while (0) +#define P512_SR_1() \ + do { \ + __asm__ __volatile__ ( \ + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ + "mov %[t1], %[S_7] \n\t"\ + "mov %[S_7], %[S_6] \n\t"\ + "mov %[S_6], %[S_5] \n\t"\ + "mov %[S_5], %[S_4] \n\t"\ + "ROR %[S_4], %[t1] , #31 \n\t"\ + "/*rotate shift left 8 bits [w10 w6 w2-> 锛坵6,3) (w2,3) ( w10,2)]*/ \n\t"\ + "ROR %[S_11], %[S_11] , #28 \n\t"\ + "ROR %[S_10], %[S_10] , #28 \n\t"\ + "ROR %[S_9], %[S_9] , #28 \n\t"\ + "ROR %[S_8], %[S_8] , #28 \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1),\ + [S_4] "+r" (s[4]), [S_8] "+r" (s[8]) ,\ + [S_5] "+r" (s[5]), [S_9] "+r" (s[9]) ,\ + [S_6] "+r" (s[6]), [S_10] "+r" (s[10]),\ + [S_7] "+r" (s[7]), [S_11] "+r" (s[11])\ + : : );\ +}while (0) + //t1 t2 +#define P512_SR_ARC_2(rci) \ + do { \ + __asm__ __volatile__ ( \ + "/*rotate shift left 25 bit [w11 w7 w3-> 锛坵3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t3], %[S_15] \n\t"\ + "ROR %[S_15], %[S_14] , #26 \n\t"\ + "ROR %[S_14], %[S_13] , #26 \n\t"\ + "ROR %[S_13], %[S_12] , #26 \n\t"\ + "ROR %[S_12], %[t3] , #25 \n\t"\ + "/*add round const s0 s1 s2 s3*/ \n\t"\ + "ands %[t1] , %[rci] , #0xc0\n\t" \ + "eors %[S_3] , %[S_3] , %[t1], LSR #6 \n\t" /*s[3] ^= (constant7Format_aead[lunNum] >> 6) & 0x3;*/\ + "ands %[t2] , %[rci] , #0x30\n\t" \ + "eors %[S_2] , %[S_2] , %[t2], LSR #4 \n\t" /*s[2] ^= (constant7Format_aead[lunNum] >> 4) & 0x3;*/\ + "ands %[t3] , %[rci] , #0xc\n\t" \ + "eors %[S_1] , %[S_1] , %[t3], LSR #2 \n\t" /*s[1] ^= (constant7Format_aead[lunNum] >> 2) & 0x3;*/\ + "ands %[t1] , %[rci] , #0x3\n\t" \ + "eors %[S_0] , %[S_0] , %[t1] \n\t" /*s[0] ^= constant7Format_aead[lunNum] & 0x3;*/\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2), [t3] "=r" (t3), [rci] "+r" (rci),\ + [S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_2] "+r" (s[2]), [S_3] "+r" (s[3]),\ + [S_12] "+r" (s[12]),[S_13] "+r" (s[13]),[S_14] "+r" (s[14]),[S_15] "+r" (s[15])\ + : : );\ +}while (0) + //t1 +#define P512_SR_2() \ + do { \ + __asm__ __volatile__ ( \ + "/*rotate shift left 25 bit [w11 w7 w3-> 锛坵3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t1], %[S_15] \n\t"\ + "ROR %[S_15], %[S_14] , #26 \n\t"\ + "ROR %[S_14], %[S_13] , #26 \n\t"\ + "ROR %[S_13], %[S_12] , #26 \n\t"\ + "ROR %[S_12], %[t1] , #25 \n\t"\ + : /* output variables - including inputs that are changed */\ + [S_12] "+r" (s[12]),[S_13] "+r" (s[13]),[S_14] "+r" (s[14]),[S_15] "+r" (s[15]),\ + [t1] "=r" (t1): : );\ +}while (0) diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_2/crypto_aead.h b/knot/Implementations/crypto_aead/knot256/armcortexm_2/crypto_aead.h index cdfdf19..10ecefb 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_2/crypto_aead.h +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_2/crypto_aead.h @@ -1,17 +1,10 @@ -int crypto_aead_encrypt( - unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k -); +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k); -int crypto_aead_decrypt( - unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k -); +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k); diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c index 17041b6..d30e162 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c @@ -1,308 +1,19 @@ - #include"auxFormat.h" #define aead_RATE (128 / 8) #define PR0_ROUNDS 100 #define PR_ROUNDS 52 #define PRF_ROUNDS 56 -unsigned char constant7Format_aead[127] = { - /*constant7_aead_256*/ -0x1, -0x4, -0x10, -0x40, -0x2, -0x8, -0x21, -0x5, -0x14, -0x50, -0x42, -0xa, -0x29, -0x24, -0x11, -0x44, -0x12, -0x48, -0x23, -0xd, -0x35, -0x55, -0x56, -0x5a, -0x6b, -0x2e, -0x38, -0x60, -0x3, -0xc, -0x31, -0x45, -0x16, -0x58, -0x63, -0xf, -0x3d, -0x74, -0x53, -0x4e, -0x3b, -0x6c, -0x32, -0x49, -0x27, -0x1d, -0x75, -0x57, -0x5e, -0x7b, -0x6e, -0x3a, -0x68, -0x22, -0x9, -0x25, -0x15, -0x54, -0x52, -0x4a, -0x2b, -0x2c, -0x30, -0x41, -0x6, -0x18, -0x61, -0x7, -0x1c, -0x71, -0x47, -0x1e, -0x79, -0x66, -0x1b, -0x6d, -0x36, -0x59, -0x67, -0x1f, -0x7d, -0x76, -0x5b, -0x6f, -0x3e, -0x78, -0x62, -0xb, -0x2d, -0x34, -0x51, -0x46, -0x1a, -0x69, -0x26, -0x19, -0x65, -0x17, -0x5c, -0x73, -0x4f, -0x3f, -0x7c, -0x72, -0x4b, -0x2f, -0x3c, -0x70, -0x43, -0xe, -0x39, -0x64, -0x13, -0x4c, -0x33, -0x4d, -0x37, -0x5d, -0x77, -0x5f, -0x7f, -0x7e, -0x7a, -0x6a, -0x2a, -0x28, -0x20, -}; - - - - -/* State - * w12 w8 w4 w0 - * w13 w9 w5 w1 - * w14 w10 w6 w2 - * w15 w11 w7 w3 - * - */ - static void permutation512(unsigned int *in, int rounds, unsigned char *rc) { - uint32_t w0, w1, w2, w3, w4, w5, w6, w7; - uint32_t w8, w9, w10, w11, w12, w13, w14, w15; - uint32_t s0, s1, s2,s3; - uint32_t i=0; - __asm volatile( - "ldr w0, [in] \n\t" - "ldr w4, [in, #4] \n\t" - "ldr w8, [in, #8] \n\t" - "ldr w12, [in, #12] \n\t" - "ldr w1, [in, #16] \n\t" - "ldr w5, [in, #20] \n\t" - "ldr w9, [in, #24] \n\t" - "ldr w13, [in, #28] \n\t" - "ldr w2, [in, #32] \n\t" - "ldr w6, [in, #36] \n\t" - "ldr w10, [in, #40] \n\t" - "ldr w14, [in, #44] \n\t" - "ldr w3, [in, #48] \n\t" - "ldr w7, [in, #52] \n\t" - "ldr w11, [in, #56] \n\t" - "ldr w15, [in, #60] \n\t" - "enc_loop: \n\t" - "/*add round const s0 s1 s2 s3*/ \n\t" - "ldrb s3, [rc] \n\t" - - "LSR s0, s3, #6 \n\t" - "and s0, s0, 0x3 \n\t" - - "LSR s1, s3, #4 \n\t" - "and s1, s1, 0x3 \n\t" - - "LSR s2, s3, #2 \n\t" - "and s2, s2, 0x3 \n\t" - - "and s3, s3, 0x3 \n\t" - - "eors w12, w12, s0 \n\t" - "eors w8, w8, s1 \n\t" - "eors w4, w4, s2 \n\t" - "eors w0, w0, s3 \n\t" - "/*sbox first column*/ \n\t" - "mvns w0, w0 \n\t" - "ands s0, w1, w0 \n\t" - "eors s0, w2, s0 \n\t" - "orrs w2, w1, w2 \n\t" - "eors w0, w3, w0 \n\t" - "eors w2, w2, w0 \n\t" - "eors s1, w1, w3 \n\t" - "eors w3, w3, s0 \n\t" - "ands w0, s0, w0 \n\t" - "eors w0, s1, w0 \n\t" - "ands w1, w2, s1 \n\t" - "eors w1, s0, w1 \n\t" - "/*sbox second column*/ \n\t" - "mvns w4, w4 \n\t" - "ands s0, w5, w4 \n\t" - "eors s0, w6, s0 \n\t" - "orrs w6, w5, w6 \n\t" - "eors w4, w7, w4 \n\t" - "eors w6, w6, w4 \n\t" - "eors s1, w5, w7 \n\t" - "eors w7, w7, s0 \n\t" - "ands w4, s0, w4 \n\t" - "eors w4, s1, w4 \n\t" - "ands w5, w6, s1 \n\t" - "eors w5, s0, w5 \n\t" - "/*sbox third column*/ \n\t" - "mvns w8, w8 \n\t" - "ands s0, w9, w8 \n\t" - "eors s0, w10, s0 \n\t" - "orrs w10, w9, w10 \n\t" - "eors w8, w11, w8 \n\t" - "eors w10, w10, w8 \n\t" - "eors s1, w9, w11 \n\t" - "eors w11, w11, s0 \n\t" - "ands w8, s0, w8 \n\t" - "eors w8, s1, w8 \n\t" - "ands w9, w10, s1 \n\t" - "eors w9, s0, w9 \n\t" - "/*sbox forth column*/ \n\t" - "mvns w12, w12 \n\t" - "ands s0, w13, w12 \n\t" - "eors s0, w14, s0 \n\t" - "orrs w14, w13, w14 \n\t" - "eors w12, w15, w12 \n\t" - "eors w14, w14, w12 \n\t" - "eors s1, w13, w15 \n\t" - "eors w15, w15, s0 \n\t" - "ands w12, s0, w12 \n\t" - "eors w12, s1, w12 \n\t" - "ands w13, w14, s1 \n\t" - "eors w13, s0, w13 \n\t" - "/*rotate shift left 1 bit [ w13 w9 w5 w1-> w9 w5 w1 (w13,1)] */ \n\t" - "mov s0, w13 \n\t" - "mov w13, w9 \n\t" - "mov w9, w5 \n\t" - "mov w5, w1 \n\t" - "ROR w1, s0 , #31 \n\t" - "/*rotate shift left 8 bits [w14 w10 w6 w2->(w14,4) (w10,4) (w6,4) ( w2,4)]*/ \n\t" - "ROR w14, w14 , #28 \n\t" - "ROR w10, w10 , #28 \n\t" - "ROR w6, w6 , #28 \n\t" - "ROR w2, w2 , #28 \n\t" - "/*rotate shift left 25 bit [w15 w11 w7 w3-> (w11,13) (w7,14) ( w3,14) ( w15,14)] */ \n\t" - "mov s0, w15 \n\t" - "ROR w15, w11 , #26 \n\t" - "ROR w11, w7 , #26 \n\t" - "ROR w7 , w3 , #26 \n\t" - "ROR w3 , s0 , #25 \n\t" - "/*loop control*/ \n\t" - "adds rc, rc, #1 \n\t" - "subs rounds, rounds, #1 \n\t" - "bne enc_loop \n\t" - "str w0, [in] \n\t" - "str w4, [in, #4] \n\t" - "str w8, [in, #8] \n\t" - "str w12, [in, #12] \n\t" - "str w1, [in, #16] \n\t" - "str w5, [in, #20] \n\t" - "str w9, [in, #24] \n\t" - "str w13, [in, #28] \n\t" - "str w2, [in, #32] \n\t" - "str w6, [in, #36] \n\t" - "str w10, [in, #40] \n\t" - "str w14, [in, #44] \n\t" - "str w3, [in, #48] \n\t" - "str w7, [in, #52] \n\t" - "str w11, [in, #56] \n\t" - "str w15, [in, #60] \n\t" - ); +void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket(s + 4, npub + 16); + packU128FormatToFourPacket(s + 8, k); + packU128FormatToFourPacket(s + 12, k + 16); + P512(s, constant7Format_aead, PR0_ROUNDS); } - - -int crypto_aead_encrypt( - unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k -) { - u32 i, j; - u32 s_temp[16] = { 0 }; - u32 t1, t2, t3, t5, t6, t8, t9, t11; - // initialization - u32 s[16] = { 0 }; +void ProcessAssocData(u32 *s, const u8* ad, unsigned long long adlen) { u32 dataFormat[4] = { 0 }; - u8 tempData[16] = {0}; - *clen = mlen + CRYPTO_ABYTES; - //initialization - packU128FormatToFourPacket(s, npub); - packU128FormatToFourPacket((s + 4), (npub + 16)); - packU128FormatToFourPacket((s + 8), k); - packU128FormatToFourPacket((s + 12), (k + 16)); - permutation512(s,PR0_ROUNDS,constant7Format_aead); - // process associated data + u8 tempData[16] = { 0 }; if (adlen) { while (adlen >= aead_RATE) { packU128FormatToFourPacket(dataFormat, ad); @@ -310,7 +21,7 @@ int crypto_aead_encrypt( s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; s[3] ^= dataFormat[3]; - permutation512(s,PR_ROUNDS,constant7Format_aead); + P512(s, constant7Format_aead, PR_ROUNDS); adlen -= aead_RATE; ad += aead_RATE; } @@ -322,9 +33,14 @@ int crypto_aead_encrypt( s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; s[3] ^= dataFormat[3]; - permutation512(s,PR_ROUNDS,constant7Format_aead); + P512(s, constant7Format_aead, PR_ROUNDS); } s[15] ^= 0x80000000; +} +void ProcessPlaintext(u32 *s, const u8* m, unsigned long long mlen, + unsigned char *c) { + u32 dataFormat[4] = { 0 }; + u8 tempData[16] = { 0 }; if (mlen) { while (mlen >= aead_RATE) { packU128FormatToFourPacket(dataFormat, m); @@ -333,14 +49,14 @@ int crypto_aead_encrypt( s[2] ^= dataFormat[2]; s[3] ^= dataFormat[3]; unpackU128FormatToFourPacket(c, s); - permutation512(s,PR_ROUNDS,constant7Format_aead); + P512(s, constant7Format_aead, PR_ROUNDS); mlen -= aead_RATE; m += aead_RATE; c += aead_RATE; } memset(tempData, 0, sizeof(tempData)); memcpy(tempData, m, mlen * sizeof(unsigned char)); - tempData[mlen]= 0x01; + tempData[mlen] = 0x01; packU128FormatToFourPacket(dataFormat, tempData); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; @@ -348,105 +64,92 @@ int crypto_aead_encrypt( s[3] ^= dataFormat[3]; unpackU128FormatToFourPacket(tempData, s); memcpy(c, tempData, mlen * sizeof(unsigned char)); - c += mlen; + //c += mlen; } - // finalization - permutation512(s,PRF_ROUNDS,constant7Format_aead); +} + +void Finalize_GenerateTag(u32 *s, unsigned char *c) { + P512(s, constant7Format_aead, PRF_ROUNDS); // return tag unpackU128FormatToFourPacket(c, s); - unpackU128FormatToFourPacket((c+16), (s+4)); - return 0; + unpackU128FormatToFourPacket(c + 16, s + 4); } - -int crypto_aead_decrypt( - unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k -){ - u32 s_temp[16] = { 0 }; - u32 t1, t2, t3, t5, t6, t8, t9, t11; - u8 i, j; - // initialization - u32 s[16] = { 0 }; - u32 dataFormat[4] = { 0 }; - u32 dataFormat_1[4] = { 0 }; - u32 dataFormat_2[4] = { 0 }; - u8 tempData[16] = { 0 }; - u8 tempU8[64] = { 0 }; - - if (clen < CRYPTO_ABYTES) +int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m, + unsigned long long *mlen) { + u8 tempU8[32] = { 0 }; + P512(s, constant7Format_aead, PRF_ROUNDS); + unpackU128FormatToFourPacket(tempU8, s); + unpackU128FormatToFourPacket(tempU8 + 16, s + 4); + if (memcmp((void*) tempU8, (void*) (c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; return -1; - *mlen = clen - CRYPTO_ABYTES; - //initialization - packU128FormatToFourPacket(s, npub); - packU128FormatToFourPacket((s + 4), (npub + 16)); - packU128FormatToFourPacket((s + 8), k); - packU128FormatToFourPacket((s + 12), (k + 16)); - permutation512(s,PR0_ROUNDS,constant7Format_aead); - // process associated data - if (adlen) { - while (adlen >= aead_RATE) { - packU128FormatToFourPacket(dataFormat, ad); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - s[3] ^= dataFormat[3]; - permutation512(s,PR_ROUNDS,constant7Format_aead); - adlen -= aead_RATE; - ad += aead_RATE; - } - memset(tempData, 0, sizeof(tempData)); - - memcpy(tempData, ad, adlen * sizeof(unsigned char)); - tempData[adlen] = 0x01; - packU128FormatToFourPacket(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - s[3] ^= dataFormat[3]; - permutation512(s,PR_ROUNDS,constant7Format_aead); } - s[15] ^= 0x80000000; - clen = clen - CRYPTO_KEYBYTES; - + return 0; +} +void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, + unsigned long long clen) { + u32 dataFormat[8] = { 0 }; + u32 dataFormat_1[4] = { 0 }; + u8 i, tempU8[64] = { 0 }; if (clen) { while (clen >= aead_RATE) { - packU128FormatToFourPacket(dataFormat_2, c); - dataFormat_1[0] = s[0] ^ dataFormat_2[0]; - dataFormat_1[1] = s[1] ^ dataFormat_2[1]; - dataFormat_1[2] = s[2] ^ dataFormat_2[2]; - dataFormat_1[3] = s[3] ^ dataFormat_2[3]; + packU128FormatToFourPacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + dataFormat_1[3] = s[3] ^ dataFormat[3]; unpackU128FormatToFourPacket(m, dataFormat_1); - s[0] = dataFormat_2[0]; - s[1] = dataFormat_2[1]; - s[2] = dataFormat_2[2]; - s[3] = dataFormat_2[3]; - permutation512(s,PR_ROUNDS,constant7Format_aead); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; + P512(s, constant7Format_aead, PR_ROUNDS); clen -= aead_RATE; m += aead_RATE; c += aead_RATE; } unpackU128FormatToFourPacket(tempU8, s); - for (i = 0; i < clen; ++i, ++m, ++c) - { + for (i = 0; i < clen; ++i, ++m, ++c) { *m = tempU8[i] ^ *c; tempU8[i] = *c; } tempU8[i] ^= 0x01; packU128FormatToFourPacket(s, tempU8); } +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[16] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + //initialization + Initialize(s, npub, k); + // process associated data + + ProcessAssocData(s, ad, adlen); + + ProcessPlaintext(s, m, mlen, c); + // finalization - permutation512(s,PRF_ROUNDS,constant7Format_aead); - // return tag - packU128FormatToFourPacket(dataFormat, c); - packU128FormatToFourPacket(dataFormat_1, (c + 16)); - if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3] - || dataFormat_1[0] != s[4] || dataFormat_1[1] != s[5] || dataFormat_1[2] != s[6] || dataFormat_1[3] != s[7]) { - return -1; - } + Finalize_GenerateTag(s, c + mlen); return 0; - -} \ No newline at end of file +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[16] = { 0 }; + if (clen < CRYPTO_ABYTES) + return -1; + *mlen = clen - CRYPTO_ABYTES; + //initialization + Initialize(s, npub, k); + ProcessAssocData(s, ad, adlen); + ProcessCiphertext(s, m, c, clen - CRYPTO_ABYTES); + // finalization + return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen); +} diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_4/api.h b/knot/Implementations/crypto_aead/knot256/armcortexm_4/api.h new file mode 100644 index 0000000..3eb57e5 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_4/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 32 //256/8=32 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 32 +#define CRYPTO_ABYTES 32 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.c new file mode 100644 index 0000000..63f20be --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.c @@ -0,0 +1,82 @@ +#include"auxFormat.h" + +void P512(unsigned int *s, unsigned char *round, unsigned char lunNum) { + u32 s_temp[16] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + unsigned char i; + for (i = 0; i < lunNum; i++) { + s[3] ^= (round[i] >> 6) & 0x3; + s[2] ^= (round[i] >> 4) & 0x3; + s[1] ^= (round[i] >> 2) & 0x3; + s[0] ^= round[i] & 0x3; + sbox(s[3], s[7], s[11], s[15], s_temp[7], s_temp[11], s_temp[15]); + sbox(s[2], s[6], s[10], s[14], s[7], s_temp[10], s_temp[14]); + sbox(s[1], s[5], s[9], s[13], s[6], s_temp[9], s_temp[13]); + sbox(s[0], s[4], s[8], s[12], s[5], s_temp[8], s_temp[12]); + s[4] = LOTR32(s_temp[7], 1); + BIT_LOTR32_16(s_temp[8], s_temp[9], s_temp[10], s_temp[11], s[8], s[9], + s[10], s[11]); + BIT_LOTR32_25(s_temp[12], s_temp[13], s_temp[14], s_temp[15], s[12], + s[13], s[14], s[15]); + + } +} +void packU128FormatToFourPacket(u32 * out, u8 * in) { + u32 t0 = U32BIG(((u32* )in)[0]); + u32 t1 = U32BIG(((u32* )in)[1]); + u32 t2 = U32BIG(((u32* )in)[2]); + u32 t3 = U32BIG(((u32* )in)[3]); + u32 temp1; + puck32(t0); + puck32(t0); + puck32(t1); + puck32(t1); + puck32(t2); + puck32(t2); + puck32(t3); + puck32(t3); + out[3] = (t3 & 0xff000000) | ((t2 >> 8) & 0x00ff0000) + | ((t1 >> 16) & 0x0000ff00) | (t0 >> 24); + out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) + | ((t1 >> 8) & 0x0000ff00) | ((t0 >> 16) & 0x000000ff); + out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) + | (t1 & 0x0000ff00) | ((t0 >> 8) & 0x000000ff); + out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) + | ((t1 << 8) & 0x0000ff00) | (t0 & 0x000000ff); +} +void unpackU128FormatToFourPacket(u8 * out, u32 * in) { + u32 t[4] = { 0 }; + u32 r0; + t[3] = (in[3] & 0xff000000 )| ((in[2] >> 8) & 0x00ff0000) + | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); + t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) + | ((in[1] >> 8) & 0x0000ff00) | ((in[0] >> 16) & 0x000000ff); + t[1] = ((in[3] << 16) & 0xff000000) | ((in[2] << 8) & 0x00ff0000) + | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); + t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) + | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); + unpuck32(t[0]); + unpuck32(t[0]); + unpuck32(t[1]); + unpuck32(t[1]); + unpuck32(t[2]); + unpuck32(t[2]); + unpuck32(t[3]); + unpuck32(t[3]); + memcpy(out, t, 16 * sizeof(unsigned char)); +} + + +unsigned char constant7Format_aead[100] = { +/*constant7_aead_256*/ +0x1, 0x4, 0x10, 0x40, 0x2, 0x8, 0x21, 0x5, 0x14, 0x50, 0x42, 0xa, 0x29, 0x24, + 0x11, 0x44, 0x12, 0x48, 0x23, 0xd, 0x35, 0x55, 0x56, 0x5a, 0x6b, 0x2e, + 0x38, 0x60, 0x3, 0xc, 0x31, 0x45, 0x16, 0x58, 0x63, 0xf, 0x3d, 0x74, + 0x53, 0x4e, 0x3b, 0x6c, 0x32, 0x49, 0x27, 0x1d, 0x75, 0x57, 0x5e, 0x7b, + 0x6e, 0x3a, 0x68, 0x22, 0x9, 0x25, 0x15, 0x54, 0x52, 0x4a, 0x2b, 0x2c, + 0x30, 0x41, 0x6, 0x18, 0x61, 0x7, 0x1c, 0x71, 0x47, 0x1e, 0x79, 0x66, + 0x1b, 0x6d, 0x36, 0x59, 0x67, 0x1f, 0x7d, 0x76, 0x5b, 0x6f, 0x3e, 0x78, + 0x62, 0xb, 0x2d, 0x34, 0x51, 0x46, 0x1a, 0x69, 0x26, 0x19, 0x65, 0x17, + 0x5c, 0x73, + +}; diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.h new file mode 100644 index 0000000..37a8c45 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.h @@ -0,0 +1,49 @@ +#include"crypto_aead.h" +#include"api.h" +#include +#define U32BIG(x) (x) + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +void printU8(char name[], u8 var[], long len, int offset); + +//new +void puckU8FormatToFourPacket(u8 in, u8 *out); + +#define puck32(in)\ +{\ +temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ +temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ +temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ +temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ +} +#define unpuck32(t0){\ + r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ + r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ + r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ + r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ +} + +#define BIT_LOTR32_16(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t0, 4);\ +t5 = LOTR32(t1, 4);\ +t6 = LOTR32(t2, 4); \ +t7 = LOTR32(t3, 4); \ +} +#define BIT_LOTR32_25(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t3, 7);\ +t5 = LOTR32(t0, 6);\ +t6 = LOTR32(t1, 6); \ +t7 = LOTR32(t2, 6); \ +} + +unsigned char constant7Format_aead[100]; diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_4/crypto_aead.h b/knot/Implementations/crypto_aead/knot256/armcortexm_4/crypto_aead.h new file mode 100644 index 0000000..10ecefb --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_4/crypto_aead.h @@ -0,0 +1,10 @@ +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k); + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k); diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_4/encrypt.c b/knot/Implementations/crypto_aead/knot256/armcortexm_4/encrypt.c new file mode 100644 index 0000000..31a9ce9 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_4/encrypt.c @@ -0,0 +1,153 @@ +#include"auxFormat.h" + +#define aead_RATE (128 / 8) +#define PR0_ROUNDS 100 +#define PR_ROUNDS 52 +#define PRF_ROUNDS 56 +void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket(s + 4, npub + 16); + packU128FormatToFourPacket(s + 8, k); + packU128FormatToFourPacket(s + 12, k + 16); + P512(s, constant7Format_aead, PR0_ROUNDS); +} +void ProcessAssocData(u32 *s, const u8* ad, unsigned long long adlen) { + + u32 dataFormat[4] = { 0 }; + u8 tempData[16] = { 0 }; + if (adlen) { + while (adlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + P512(s, constant7Format_aead, PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + P512(s, constant7Format_aead, PR_ROUNDS); + } + s[15] ^= 0x80000000; +} +void ProcessPlaintext(u32 *s, const u8* m, unsigned long long mlen, + unsigned char *c) { + u32 dataFormat[4] = { 0 }; + u8 tempData[16] = { 0 }; + if (mlen) { + while (mlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + unpackU128FormatToFourPacket(c, s); + P512(s, constant7Format_aead, PR_ROUNDS); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + unpackU128FormatToFourPacket(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + //c += mlen; + } +} + +void Finalize_GenerateTag(u32 *s, unsigned char *c) { + P512(s, constant7Format_aead, PRF_ROUNDS); + // return tag + unpackU128FormatToFourPacket(c, s); + unpackU128FormatToFourPacket(c + 16, s + 4); +} +int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m, + unsigned long long *mlen) { + u8 tempU8[32] = { 0 }; + P512(s, constant7Format_aead, PRF_ROUNDS); + unpackU128FormatToFourPacket(tempU8, s); + unpackU128FormatToFourPacket(tempU8 + 16, s + 4); + if (memcmp((void*) tempU8, (void*) (c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} +void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, + unsigned long long clen) { + u32 dataFormat[8] = { 0 }; + u32 dataFormat_1[4] = { 0 }; + u8 i, tempU8[64] = { 0 }; + if (clen) { + while (clen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + dataFormat_1[3] = s[3] ^ dataFormat[3]; + unpackU128FormatToFourPacket(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; + P512(s, constant7Format_aead, PR_ROUNDS); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU128FormatToFourPacket(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU128FormatToFourPacket(s, tempU8); + } +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[16] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + //initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessPlaintext(s, m, mlen, c); + // finalization + Finalize_GenerateTag(s, c + mlen); + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[16] = { 0 }; + if (clen < CRYPTO_ABYTES) + return -1; + *mlen = clen - CRYPTO_ABYTES; + //initialization + Initialize(s, npub, k); + ProcessAssocData(s, ad, adlen); + ProcessCiphertext(s, m, c, clen - CRYPTO_ABYTES); + // finalization + return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen); +} -- libgit2 0.26.0