From 727508dcad43c07f01902ebfd15ab7f5f163258d Mon Sep 17 00:00:00 2001 From: Wentao Zhang Date: Wed, 18 Nov 2020 08:46:28 +0000 Subject: [PATCH] knot --- knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c | 119 ++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------- knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h | 42 ++++++++++++++++++++++++++++-------------- knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c | 20 +++++++++++++------- knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------- knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------- knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c | 120 ++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------ knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h | 43 ++++++++++++++++++++++++++----------------- knot/Implementations/crypto_aead/knot128v2/armcortexm_4/encrypt.c | 20 +++++++++++++------- knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.c | 81 ++++++++++++++++++++++++++++++--------------------------------------------------- knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.h | 30 ++++++++++++++++++------------ knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c | 81 ++++++++++++++++++++++++++++++--------------------------------------------------- knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h | 36 ++++++++++++++++++++---------------- knot/Implementations/crypto_aead/knot128v2/opt_1/encrypt.c | 1 + knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/encrypt.c | 1 + knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c | 110 ++++++++++++++++++++++++++++++++------------------------------------------------------------------------------ knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h | 35 ++++++++++++++++------------------- knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c | 27 ++++++++++++++++++++------- knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------- knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------- knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c | 108 ++++++++++++++++++++++++++++++------------------------------------------------------------------------------ knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h | 46 ++++++++++++++++++++++++---------------------- knot/Implementations/crypto_aead/knot192/armcortexm_4/encrypt.c | 34 +++++++++++++++++++++++++--------- knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------- knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.h | 62 +++++++++++++++++++++++--------------------------------------- knot/Implementations/crypto_aead/knot192/armcortexm_5/encrypt.c | 46 ++++++++++------------------------------------ knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c | 100 +++++++++++++++++++++++++++++----------------------------------------------------------------------- knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h | 34 +++++++++++++++++++++------------- knot/Implementations/crypto_aead/knot192/opt_1/encrypt.c | 1 + knot/Implementations/crypto_aead/knot192/opt_2/encrypt.c | 1 + knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.c | 68 +++++++++++++++++++++++++++----------------------------------------- knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h | 49 ++++++++++++++++++++++++++++--------------------- knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c | 36 ++++++++++++++++++++++++++++++------ knot/Implementations/crypto_aead/knot256/armcortexm_3/auxFormat.h | 102 +++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------- knot/Implementations/crypto_aead/knot256/armcortexm_3/encrypt.c | 309 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.c | 26 ++++++++------------------ knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.h | 39 ++++++++++++++++++++++++--------------- knot/Implementations/crypto_aead/knot256/armcortexm_4/encrypt.c | 31 +++++++++++++++++++++++++------ knot/Implementations/crypto_aead/knot256/armcortexm_5/auxFormat.c | 40 +++++++++------------------------------- knot/Implementations/crypto_aead/knot256/armcortexm_5/auxFormat.h | 30 +++++++++++++++++++----------- knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.c | 41 ++++++++++------------------------------- knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.h | 30 +++++++++++++++++++----------- knot/Implementations/crypto_hash/knot256v1/LWC_HASH_KAT_256.txt | 1 + knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------- knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.h | 28 ++++++++++++++++------------ knot/Implementations/crypto_hash/knot256v2/armcortexm_3/auxFormat.h | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------- knot/Implementations/crypto_hash/knot256v2/armcortexm_3/hash.c | 5 +++-- knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------- knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.h | 28 ++++++++++++++++------------ knot/Implementations/crypto_hash/knot256v2/armcortexm_4/hash.c | 5 +++-- knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------- knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.h | 28 ++++++++++++++++------------ knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c | 108 +++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------- knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h | 28 ++++++++++++++++------------ knot/Implementations/crypto_hash/knot256v2/opt_1/hash.c | 1 + knot/Implementations/crypto_hash/knot384/LWC_HASH_KAT_384.txt | 2 ++ knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.c | 73 +++++++++++++++++++++++++++++-------------------------------------------- knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.h | 28 ++++++++++++++++------------ knot/Implementations/crypto_hash/knot384/armcortexm_3/auxFormat.h | 100 +++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------- knot/Implementations/crypto_hash/knot384/armcortexm_3/hash.c | 5 +++-- knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.c | 73 +++++++++++++++++++++++++++++-------------------------------------------- knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.h | 34 ++++++++++++++++++---------------- knot/Implementations/crypto_hash/knot384/armcortexm_4/hash.c | 5 +++-- knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------- knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.h | 3 +-- knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c | 95 ++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------- knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h | 28 ++++++++++++++++------------ knot/Implementations/crypto_hash/knot512/LWC_HASH_KAT_512.txt | 3 +++ knot/Implementations/crypto_hash/knot512/armcortexm_2/auxFormat.c | 23 ++++++++--------------- knot/Implementations/crypto_hash/knot512/armcortexm_2/auxFormat.h | 28 ++++++++++++++++------------ knot/Implementations/crypto_hash/knot512/armcortexm_3/auxFormat.h | 43 ++++++++++++++++++++++--------------------- knot/Implementations/crypto_hash/knot512/armcortexm_4/auxFormat.c | 23 ++++++++--------------- knot/Implementations/crypto_hash/knot512/armcortexm_4/auxFormat.h | 28 ++++++++++++++++------------ knot/Implementations/crypto_hash/knot512/armcortexm_5/auxFormat.c | 23 +++++++++-------------- knot/Implementations/crypto_hash/knot512/armcortexm_5/auxFormat.h | 27 ++++++++++++++++----------- knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.c | 23 +++++++++-------------- knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.h | 29 ++++++++++++++++------------- knot/Implementations/crypto_hash/knot512/opt_1/hash.c | 9 +++++---- knot/Implementations/crypto_hash/knot512/opt_SSE/hash.c | 1 + 78 files changed, 1911 insertions(+), 2490 deletions(-) diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c index 6d00d13..b9e0da1 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c @@ -1,96 +1,37 @@ #include"auxFormat.h" +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ -//puck begin// -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t1_32, t2_64, t2_65; - u32 t[3] = { 0 }; - temp0[0] = in[0] & 0xffe00000; - temp1[0] = (in[0] & 0x001ffc00) << 11; - temp2[0] = (in[0] & 0x000003ff) << 22; - temp0[1] = in[1] & 0xffe00000; - temp1[1] = (in[1] & 0x001ff800) << 11; - t2_64 = ((in[1] & 0x00000400) << 21); - temp2[1] = (in[1] & 0x000003ff) << 22; - temp0[2] = in[2] & 0xffc00000; - t1_32 = ((in[2] & 0x00200000) << 10); - temp1[2] = (in[2] & 0x001ff800) << 11; - t2_65 = ((in[2] & 0x00000400) << 20); - temp2[2] = (in[2] & 0x000003ff) << 22; - unpuckU32ToThree(temp0[0]); - unpuckU32ToThree(temp0[1]); - unpuckU32ToThree(temp0[2]); - t[2] = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; - unpuckU32ToThree(temp1[0]); - unpuckU32ToThree(temp1[1]); - unpuckU32ToThree(temp1[2]); - t[1] = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); - unpuckU32ToThree(temp2[0]); - unpuckU32ToThree(temp2[1]); - unpuckU32ToThree(temp2[2]); - t[0] = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); - memcpy(out, t, 12 * sizeof(unsigned char)); +out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | + ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | + (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); +out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | + ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | + (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); +out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | + ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | + (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); } -void packU96FormatToThreePacket(u32 * out, u8 * in) { - u32 t0 = U32BIG(((u32*)in)[2]); - u32 t1 = U32BIG(((u32*)in)[1]); - u32 t2 = U32BIG(((u32*)in)[0]); - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; - t1 = t1 << 1; - t2 = t2 << 2; - temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2; - puckU32ToThree(temp0[0]); - puckU32ToThree(temp0[1]); - puckU32ToThree(temp0[2]); - temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; - puckU32ToThree(temp1[0]); - puckU32ToThree(temp1[1]); - puckU32ToThree(temp1[2]); - temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; - puckU32ToThree(temp2[0]); - puckU32ToThree(temp2[1]); - puckU32ToThree(temp2[2]); - out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); - out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); - out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); -} - -void packU32FormatToThreePacket(u32 * out, u8 * in) { - u32 t2 = U32BIG(((u32*)in)[0]); - u32 temp2[3] = { 0 }; - u8 t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; - t2 = t2 << 2; - temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; - puckU32ToThree(temp2[0]); - puckU32ToThree(temp2[1]); - puckU32ToThree(temp2[2]); - out[0] = (temp2[0] >> 22); - out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); - out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); -} -void unpackU32FormatToThreePacket(u8 * out, u32 * in) { - u32 temp2[3] = { 0 }; - u32 t2_64, t2_65; - u32 t2; - temp2[0] = (in[0] & 0x000003ff) << 22; - - t2_64 = ((in[1] & 0x00000400) << 21); - temp2[1] = (in[1] & 0x000003ff) << 22; - - t2_65 = ((in[2] & 0x00000400) << 20); - temp2[2] = (in[2] & 0x000003ff) << 22; - - unpuckU32ToThree(temp2[0]); - unpuckU32ToThree(temp2[1]); - unpuckU32ToThree(temp2[2]); - t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); - *(u32*)(out) = U32BIG(t2); +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 } ;\ +u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ +t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ +((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ +((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ +t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ +((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ +((t0 & 0x3fc00) >> 10); \ +t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ +((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ +((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ +unpuckU32ToThree_3(t[0]); \ +unpuckU32ToThree_3(t[1]); \ +unpuckU32ToThree_3(t[2]); \ +memcpy(out, t, 12 * sizeof(unsigned char)); \ } void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { u32 rci,t1,t2; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h index 301566c..aa7b91b 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h @@ -14,26 +14,40 @@ typedef unsigned long long u64; #define PR0_ROUNDS 76 #define PR_ROUNDS 28 #define PRF_ROUNDS 32 +/* +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 + +#define PR0_ROUNDS 76 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 + * */ #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) +void packU96FormatToThreePacket(u32 * out, u8 * in); +void unpackU96FormatToThreePacket(u8 * out, u32 * in); +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum); -//////////////////puck begin -//&:5 <<:4 |:4 -#define puckU32ToThree(x){\ -x &= 0x92492492;\ -x = (x | (x << 2)) & 0xc30c30c3;\ -x = (x | (x << 4)) & 0xf00f00f0;\ -x = (x | (x << 8)) & 0xff0000ff;\ -x = (x | (x << 16)) & 0xfff00000;\ +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree(x){\ -x &= 0xfff00000;\ -x = (x | (x >> 16)) & 0xff0000ff;\ -x = (x | (x >> 8)) & 0xf00f00f0;\ -x = (x | (x >> 4)) & 0xc30c30c3;\ -x = (x | (x >> 2)) & 0x92492492;\ +#define unpuckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } unsigned char constant7Format[80]; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c index 6c2bb34..011d8bf 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c @@ -96,7 +96,7 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigne { u32 dataFormat[12] = { 0 }; u32 dataFormat_1[12] = { 0 }; - u8 tempU8[24] = { 0 },i; + u8 tempU8[24] = { 0 },tempData[24] = { 0 }; if (clen) { while (clen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, c); @@ -123,12 +123,18 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigne } unpackU96FormatToThreePacket(tempU8, s); unpackU96FormatToThreePacket(tempU8 + 12, s + 3); - for (i = 0; i < clen; ++i, ++m, ++c) - { - *m = tempU8[i] ^ *c; - tempU8[i] = *c; - } - tempU8[i] ^= 0x01; + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG(((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG(((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG(((u32* )tempData)[2]); + U32BIG(((u32*)tempU8)[3]) ^= U32BIG(((u32* )tempData)[3]); + U32BIG(((u32*)tempU8)[4]) ^= U32BIG(((u32* )tempData)[4]); + U32BIG(((u32*)tempU8)[5]) ^= U32BIG(((u32* )tempData)[5]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; packU96FormatToThreePacket(s, tempU8); packU96FormatToThreePacket(s + 3, tempU8 + 12); } diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h index 73bf963..b545bcb 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h @@ -12,99 +12,6 @@ typedef unsigned long long u64; #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) - -#define puckU32ToThree(x){\ -x &= 0x92492492;\ -x = (x | (x << 2)) & 0xc30c30c3;\ -x = (x | (x << 4)) & 0xf00f00f0;\ -x = (x | (x << 8)) & 0xff0000ff;\ -x = (x | (x << 16)) & 0xfff00000;\ -} -#define unpuckU32ToThree(x){\ -x &= 0xfff00000;\ -x = (x | (x >> 16)) & 0xff0000ff;\ -x = (x | (x >> 8)) & 0xf00f00f0;\ -x = (x | (x >> 4)) & 0xc30c30c3;\ -x = (x | (x >> 2)) & 0x92492492;\ -} -#define packU32FormatToThreePacket( out, in) {\ -t2 = U32BIG(((u32*)in)[0]); \ -t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ -t2 = t2 << 2; \ -temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ -puckU32ToThree(temp2[0]); \ -puckU32ToThree(temp2[1]); \ -puckU32ToThree(temp2[2]); \ -out[0] = (temp2[0] >> 22); \ -out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); \ -out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); \ -} -#define packU96FormatToThreePacket(out, in) {\ -t9 = U32BIG(((u32*)in)[2]); \ -t1 = U32BIG(((u32*)in)[1]); \ -t2 = U32BIG(((u32*)in)[0]); \ -t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ -t1 = t1 << 1; \ -t2 = t2 << 2; \ -temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \ -puckU32ToThree(temp0[0]); \ -puckU32ToThree(temp0[1]); \ -puckU32ToThree(temp0[2]); \ -temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ -puckU32ToThree(temp1[0]); \ -puckU32ToThree(temp1[1]); \ -puckU32ToThree(temp1[2]); \ -temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ -puckU32ToThree(temp2[0]); \ -puckU32ToThree(temp2[1]); \ -puckU32ToThree(temp2[2]); \ -out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \ -out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ -out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ -} -#define unpackU32FormatToThreePacket(out, in) {\ -temp2[0] = (in[0] & 0x000003ff) << 22; \ -t2_64 = ((in[1] & 0x00000400) << 21); \ -temp2[1] = (in[1] & 0x000003ff) << 22; \ -t2_65 = ((in[2] & 0x00000400) << 20); \ -temp2[2] = (in[2] & 0x000003ff) << 22; \ -unpuckU32ToThree(temp2[0]); \ -unpuckU32ToThree(temp2[1]); \ -unpuckU32ToThree(temp2[2]); \ -t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ -*(u32*)(out) = U32BIG(t2); \ -} -#define unpackU96FormatToThreePacket( out, in) {\ -temp0[0] = in[0] & 0xffe00000; \ -temp1[0] = (in[0] & 0x001ffc00) << 11; \ -temp2[0] = (in[0] & 0x000003ff) << 22; \ -temp0[1] = in[1] & 0xffe00000; \ -temp1[1] = (in[1] & 0x001ff800) << 11; \ -t2_64 = ((in[1] & 0x00000400) << 21); \ -temp2[1] = (in[1] & 0x000003ff) << 22; \ -temp0[2] = in[2] & 0xffc00000; \ -t1_32 = ((in[2] & 0x00200000) << 10); \ -temp1[2] = (in[2] & 0x001ff800) << 11; \ -t2_65 = ((in[2] & 0x00000400) << 20); \ -temp2[2] = (in[2] & 0x000003ff) << 22; \ -unpuckU32ToThree(temp0[0]); \ -unpuckU32ToThree(temp0[1]); \ -unpuckU32ToThree(temp0[2]); \ -t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \ -unpuckU32ToThree(temp1[0]); \ -unpuckU32ToThree(temp1[1]); \ -unpuckU32ToThree(temp1[2]); \ -t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \ -unpuckU32ToThree(temp2[0]); \ -unpuckU32ToThree(temp2[1]); \ -unpuckU32ToThree(temp2[2]); \ -t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ -*(u32*)(out) = U32BIG(t2); \ -*(u32*)(out + 4) = U32BIG(t1); \ -*(u32*)(out + 8) = U32BIG(t9); \ -} - -#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define sbox(a, b, c, d, f, g, h) \ { \ t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ @@ -120,11 +27,86 @@ t3= LOTR32(t1, 18); \ t4 = LOTR32(t2, 18);\ t5 = LOTR32(t0, 19); \ } -/* -s0 s1 s2 -s3 s4 s5 -s6 s7 s8 -s9 s10 s11 -*/ + +#define ROUND384(lunNum) {\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ +s[5] = LOTR32(s_temp[3], 1); \ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +} + +//Processing_Data: +#define Processing_Data(data) \ +do { \ + packU96FormatToThreePacket(dataFormat, data); \ + s[0] ^= dataFormat[0]; \ + s[1] ^= dataFormat[1]; \ + s[2] ^= dataFormat[2]; \ + packU96FormatToThreePacket((dataFormat + 3), (data + 12)); \ + s[3] ^= dataFormat[3]; \ + s[4] ^= dataFormat[4]; \ + s[5] ^= dataFormat[5]; \ +} while (0) + + +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +} +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +} + +#define packU96FormatToThreePacket( out, in) { \ + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ + out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ + ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ + (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ + out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ + ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ + (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ + out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ + ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ + (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ +} + +#define unpackU96FormatToThreePacket( out, in) {\ + u32 t[3] = { 0 };\ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ + ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ + ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ + t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ + ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ + ((t0 & 0x3fc00) >> 10); \ + t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ + ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ + ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ + unpuckU32ToThree_3(t[0]); \ + unpuckU32ToThree_3(t[1]); \ + unpuckU32ToThree_3(t[2]); \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ +} + + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c index 28abf55..54608a1 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c @@ -1,61 +1,47 @@ - #include"auxFormat.h" - - #define aead_RATE (192 / 8) + #define PR0_ROUNDS 76 #define PR_ROUNDS 28 #define PRF_ROUNDS 32 +/* -unsigned char constant7Format[127] = { - /*constant7Format[127]:*/ - 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, - 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, - 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, - 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, - 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, - 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, - 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, - 0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, - 0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46, - 0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb, - 0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, }; -#define ROUND384(lunNum) {\ -s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ -s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ -s[2] ^= constant7Format[lunNum] & 0x7;\ -sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ -sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ -sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ -s[5] = LOTR32(s_temp[3], 1); \ -U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ -U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ -} + #define PR0_ROUNDS 76 + #define PR_ROUNDS 40 + #define PRF_ROUNDS 44 + + * */ +//12*6=72 +unsigned char constant7Format[76] = { +/*constant7Format[127]:*/ +0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, + 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, + 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, + 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, + 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, + 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, + 0x1e, 0xd1, 0x8f, }; int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const unsigned char *k) { + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { u8 i; u32 s[12] = { 0 }; u8 tempData[24] = { 0 }; u32 dataFormat[6] = { 0 }; u32 s_temp[12] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; - u32 t1_32, t2_64, t2_65; - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; + u8 tempU8[24] = { 0 }; *clen = mlen + CRYPTO_ABYTES; // initialization packU96FormatToThreePacket(s, npub); - memcpy(tempData, npub+12, sizeof(unsigned char)*4); - memcpy(tempData+4, k, sizeof(unsigned char) * 16); + memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); + memcpy(tempData + 4, k, sizeof(unsigned char) * 16); packU96FormatToThreePacket((s + 3), tempData); - packU96FormatToThreePacket((s + 6), (tempData+12)); - + packU96FormatToThreePacket((s + 6), (tempData + 12)); s[9] = 0x80000000; for (i = 0; i < PR0_ROUNDS; i++) { ROUND384(i); @@ -63,14 +49,8 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, // process associated data if (adlen) { while (adlen >= aead_RATE) { - packU96FormatToThreePacket(dataFormat, ad); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat+3), (ad+12)); - s[3] ^= dataFormat[3]; - s[4] ^= dataFormat[4]; - s[5] ^= dataFormat[5]; + Processing_Data(ad); + for (i = 0; i < PR_ROUNDS; i++) { ROUND384(i); } @@ -80,31 +60,18 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, memset(tempData, 0, sizeof(tempData)); memcpy(tempData, ad, adlen * sizeof(unsigned char)); tempData[adlen] = 0x01; - packU96FormatToThreePacket(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); - s[3] ^= dataFormat[3]; - s[4] ^= dataFormat[4]; - s[5] ^= dataFormat[5]; + Processing_Data(tempData); for (i = 0; i < PR_ROUNDS; i++) { ROUND384(i); } } s[9] ^= 0x80000000; + // process p data if (mlen) { while (mlen >= aead_RATE) { - packU96FormatToThreePacket(dataFormat, m); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat + 3), (m + 12)); - s[3] ^= dataFormat[3]; - s[4] ^= dataFormat[4]; - s[5] ^= dataFormat[5]; + Processing_Data(m); unpackU96FormatToThreePacket(c, s); - unpackU96FormatToThreePacket((c+12), (s+3)); + unpackU96FormatToThreePacket((c + 12), (s + 3)); for (i = 0; i < PR_ROUNDS; i++) { ROUND384(i); } @@ -114,37 +81,30 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, } memset(tempData, 0, sizeof(tempData)); memcpy(tempData, m, mlen * sizeof(unsigned char)); - tempData[mlen]= 0x01; - packU96FormatToThreePacket(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); - s[3] ^= dataFormat[3]; - s[4] ^= dataFormat[4]; - s[5] ^= dataFormat[5]; + tempData[mlen] = 0x01; + Processing_Data(tempData); unpackU96FormatToThreePacket(tempData, s); - unpackU96FormatToThreePacket((tempData+12), (s+3)); - memcpy(c, tempData, mlen * sizeof(unsigned char)); + unpackU96FormatToThreePacket((tempData + 12), (s + 3)); + memcpy(c, tempData, mlen * sizeof(unsigned char)); c += mlen; } // finalization for (i = 0; i < PRF_ROUNDS; i++) { ROUND384(i); } - // return tag - unpackU96FormatToThreePacket(c, s); - unpackU96FormatToThreePacket(tempData, (s + 3)); - memcpy(c+12, tempData, sizeof(unsigned char) * 4); + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + memcpy(c, tempU8, sizeof(unsigned char) * 12); + memcpy(c + 12, tempU8 + 12, sizeof(unsigned char) * 4); return 0; } int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, const unsigned char *k) { + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { - u8 i, j; + u8 i; u32 s[12] = { 0 }; u32 s_temp[12] = { 0 }; u32 dataFormat[12] = { 0 }; @@ -152,10 +112,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, u8 tempData[24] = { 0 }; u8 tempU8[24] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; - u32 t1_32, t2_64, t2_65; - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; *mlen = clen - CRYPTO_ABYTES; + *mlen = clen - CRYPTO_ABYTES; if (clen < CRYPTO_ABYTES) return -1; // initialization @@ -164,7 +121,6 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, memcpy(tempData + 4, k, sizeof(unsigned char) * 16); packU96FormatToThreePacket((s + 3), tempData); packU96FormatToThreePacket((s + 6), (tempData + 12)); - s[9] = 0x80000000; for (i = 0; i < PR0_ROUNDS; i++) { ROUND384(i); @@ -172,14 +128,8 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, // process associated data if (adlen) { while (adlen >= aead_RATE) { - packU96FormatToThreePacket(dataFormat, ad); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat + 3), (ad + 12)); - s[3] ^= dataFormat[3]; - s[4] ^= dataFormat[4]; - s[5] ^= dataFormat[5]; + Processing_Data(ad); + for (i = 0; i < PR_ROUNDS; i++) { ROUND384(i); } @@ -189,14 +139,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, memset(tempData, 0, sizeof(tempData)); memcpy(tempData, ad, adlen * sizeof(unsigned char)); tempData[adlen] = 0x01; - packU96FormatToThreePacket(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); - s[3] ^= dataFormat[3]; - s[4] ^= dataFormat[4]; - s[5] ^= dataFormat[5]; + Processing_Data(tempData); for (i = 0; i < PR_ROUNDS; i++) { ROUND384(i); } @@ -210,7 +153,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, dataFormat_1[0] = s[0] ^ dataFormat[0]; dataFormat_1[1] = s[1] ^ dataFormat[1]; dataFormat_1[2] = s[2] ^ dataFormat[2]; - packU96FormatToThreePacket((dataFormat+3), (c+12)); + packU96FormatToThreePacket((dataFormat + 3), (c + 12)); dataFormat_1[3] = s[3] ^ dataFormat[3]; dataFormat_1[4] = s[4] ^ dataFormat[4]; dataFormat_1[5] = s[5] ^ dataFormat[5]; @@ -230,13 +173,25 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, c += aead_RATE; } unpackU96FormatToThreePacket(tempU8, s); - unpackU96FormatToThreePacket((tempU8+12), (s+3)); - for (i = 0; i < clen; ++i, ++m, ++c) - { - *m = tempU8[i] ^ *c; - tempU8[i] = *c; - } - tempU8[i] ^= 0x01; + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG( + ((u32* )tempData)[2]); + U32BIG(((u32*)tempU8)[3]) ^= U32BIG( + ((u32* )tempData)[3]); + U32BIG(((u32*)tempU8)[4]) ^= U32BIG( + ((u32* )tempData)[4]); + U32BIG(((u32*)tempU8)[5]) ^= U32BIG( + ((u32* )tempData)[5]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; packU96FormatToThreePacket(s, tempU8); packU96FormatToThreePacket((s + 3), (tempU8 + 12)); } @@ -244,12 +199,11 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, for (i = 0; i < PRF_ROUNDS; i++) { ROUND384(i); } - // return tag unpackU96FormatToThreePacket(tempU8, s); unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); - if (memcmp((void*)tempU8, (void*)c, CRYPTO_ABYTES)) { + if (memcmp((void*) tempU8, (void*) c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); *mlen = 0; - memset(m, 0, sizeof(unsigned char) * (clen - CRYPTO_ABYTES)); return -1; } return 0; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c index 2181f8f..8abb1d9 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c @@ -1,96 +1,36 @@ #include"auxFormat.h" +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ + out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ + ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ + (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ + out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ + ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ + (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ + out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ + ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ + (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ - -//puck begin// -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t1_32, t2_64, t2_65; - u32 t[3] = { 0 }; - temp0[0] = in[0] & 0xffe00000; - temp1[0] = (in[0] & 0x001ffc00) << 11; - temp2[0] = (in[0] & 0x000003ff) << 22; - temp0[1] = in[1] & 0xffe00000; - temp1[1] = (in[1] & 0x001ff800) << 11; - t2_64 = ((in[1] & 0x00000400) << 21); - temp2[1] = (in[1] & 0x000003ff) << 22; - temp0[2] = in[2] & 0xffc00000; - t1_32 = ((in[2] & 0x00200000) << 10); - temp1[2] = (in[2] & 0x001ff800) << 11; - t2_65 = ((in[2] & 0x00000400) << 20); - temp2[2] = (in[2] & 0x000003ff) << 22; - unpuckU32ToThree(temp0[0]); - unpuckU32ToThree(temp0[1]); - unpuckU32ToThree(temp0[2]); - t[2] = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; - unpuckU32ToThree(temp1[0]); - unpuckU32ToThree(temp1[1]); - unpuckU32ToThree(temp1[2]); - t[1] = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); - unpuckU32ToThree(temp2[0]); - unpuckU32ToThree(temp2[1]); - unpuckU32ToThree(temp2[2]); - t[0] = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); - memcpy(out, t, 12 * sizeof(unsigned char)); -} -void packU96FormatToThreePacket(u32 * out, u8 * in) { - u32 t0 = U32BIG(((u32*)in)[2]); - u32 t1 = U32BIG(((u32*)in)[1]); - u32 t2 = U32BIG(((u32*)in)[0]); - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; - t1 = t1 << 1; - t2 = t2 << 2; - temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2; - puckU32ToThree(temp0[0]); - puckU32ToThree(temp0[1]); - puckU32ToThree(temp0[2]); - temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; - puckU32ToThree(temp1[0]); - puckU32ToThree(temp1[1]); - puckU32ToThree(temp1[2]); - temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; - puckU32ToThree(temp2[0]); - puckU32ToThree(temp2[1]); - puckU32ToThree(temp2[2]); - out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); - out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); - out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); -} - -void packU32FormatToThreePacket(u32 * out, u8 * in) { - u32 t2 = U32BIG(((u32*)in)[0]); - u32 temp2[3] = { 0 }; - u8 t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; - t2 = t2 << 2; - temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; - puckU32ToThree(temp2[0]); - puckU32ToThree(temp2[1]); - puckU32ToThree(temp2[2]); - out[0] = (temp2[0] >> 22); - out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); - out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); } -void unpackU32FormatToThreePacket(u8 * out, u32 * in) { - u32 temp2[3] = { 0 }; - u32 t2_64, t2_65; - u32 t2; - temp2[0] = (in[0] & 0x000003ff) << 22; - - t2_64 = ((in[1] & 0x00000400) << 21); - temp2[1] = (in[1] & 0x000003ff) << 22; - - t2_65 = ((in[2] & 0x00000400) << 20); - temp2[2] = (in[2] & 0x000003ff) << 22; - - unpuckU32ToThree(temp2[0]); - unpuckU32ToThree(temp2[1]); - unpuckU32ToThree(temp2[2]); - t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); - *(u32*)(out) = U32BIG(t2); +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 } ;\ +u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ +t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ +((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ +((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ +t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ +((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ +((t0 & 0x3fc00) >> 10); \ +t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ +((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ +((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ +unpuckU32ToThree_3(t[0]); \ +unpuckU32ToThree_3(t[1]); \ +unpuckU32ToThree_3(t[2]); \ +memcpy(out, t, 12 * sizeof(unsigned char)); \ } void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { u32 s_temp[12] = { 0 }; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h index 65d914f..445812e 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h @@ -14,7 +14,14 @@ typedef unsigned long long u64; #define PR0_ROUNDS 76 #define PR_ROUNDS 28 #define PRF_ROUNDS 32 +/* +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 + + + * */ #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) @@ -35,24 +42,26 @@ t4 = LOTR32(t2, 18);\ t5 = LOTR32(t0, 19); \ } -//////////////////puck begin -//&:5 <<:4 |:4 -#define puckU32ToThree(x){\ -x &= 0x92492492;\ -x = (x | (x << 2)) & 0xc30c30c3;\ -x = (x | (x << 4)) & 0xf00f00f0;\ -x = (x | (x << 8)) & 0xff0000ff;\ -x = (x | (x << 16)) & 0xfff00000;\ -} -#define unpuckU32ToThree(x){\ -x &= 0xfff00000;\ -x = (x | (x >> 16)) & 0xff0000ff;\ -x = (x | (x >> 8)) & 0xf00f00f0;\ -x = (x | (x >> 4)) & 0xc30c30c3;\ -x = (x | (x >> 2)) & 0x92492492;\ -} - void packU96FormatToThreePacket(u32 * out, u8 * in); void unpackU96FormatToThreePacket(u8 * out, u32 * in); unsigned char constant7Format[80]; + +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +} +#define unpuckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +} diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/encrypt.c index 6c2bb34..011d8bf 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/encrypt.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/encrypt.c @@ -96,7 +96,7 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigne { u32 dataFormat[12] = { 0 }; u32 dataFormat_1[12] = { 0 }; - u8 tempU8[24] = { 0 },i; + u8 tempU8[24] = { 0 },tempData[24] = { 0 }; if (clen) { while (clen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, c); @@ -123,12 +123,18 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigne } unpackU96FormatToThreePacket(tempU8, s); unpackU96FormatToThreePacket(tempU8 + 12, s + 3); - for (i = 0; i < clen; ++i, ++m, ++c) - { - *m = tempU8[i] ^ *c; - tempU8[i] = *c; - } - tempU8[i] ^= 0x01; + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG(((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG(((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG(((u32* )tempData)[2]); + U32BIG(((u32*)tempU8)[3]) ^= U32BIG(((u32* )tempData)[3]); + U32BIG(((u32*)tempU8)[4]) ^= U32BIG(((u32* )tempData)[4]); + U32BIG(((u32*)tempU8)[5]) ^= U32BIG(((u32* )tempData)[5]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; packU96FormatToThreePacket(s, tempU8); packU96FormatToThreePacket(s + 3, tempU8 + 12); } diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.c index 59794dd..71bc26a 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.c @@ -17,58 +17,37 @@ void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { t++; } } - void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - temp0[0] = U32BIG(((u32* )in)[0]); - temp0[1] = U32BIG(((u32*)in)[0]) >> 1; - temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = U32BIG(((u32* )in)[1]); - temp1[1] = U32BIG(((u32*)in)[1]) >> 1; - temp1[2] = U32BIG(((u32*)in)[1]) >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - temp2[0] = U32BIG(((u32* )in)[2]); - temp2[1] = U32BIG(((u32*)in)[2]) >> 1; - temp2[2] = U32BIG(((u32*)in)[2]) >> 2; - puckU32ToThree_1(temp2[0]); - puckU32ToThree_1(temp2[1]); - puckU32ToThree_1(temp2[2]); - out[0] = (temp2[1] << 21) | (temp1[0] << 10) | temp0[2]; - out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; - out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ + out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ + ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ + (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ + out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ + ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ + (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ + out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ + ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ + (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ + } void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0] >> 10) & 0x7ff; - temp1[1] = (in[2] >> 11) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; - memcpy(out, t, 12 * sizeof(unsigned char)); + u32 t[3] = { 0 } ;\ +u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ +t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ +((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ +((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ +t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ +((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ +((t0 & 0x3fc00) >> 10); \ +t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ +((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ +((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ +unpuckU32ToThree_3(t[0]); \ +unpuckU32ToThree_3(t[1]); \ +unpuckU32ToThree_3(t[2]); \ +memcpy(out, t, 12 * sizeof(unsigned char)); \ } + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.h index 1ebb614..0a4e119 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.h @@ -11,19 +11,25 @@ typedef unsigned long long u64; void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ + + +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c index e0b8ef8..452920a 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c @@ -1,61 +1,40 @@ #include"auxFormat.h" - void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - temp0[0] = U32BIG(((u32* )in)[0]); - temp0[1] = U32BIG(((u32*)in)[0]) >> 1; - temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = U32BIG(((u32* )in)[1]); - temp1[1] = U32BIG(((u32*)in)[1]) >> 1; - temp1[2] = U32BIG(((u32*)in)[1]) >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - temp2[0] = U32BIG(((u32* )in)[2]); - temp2[1] = U32BIG(((u32*)in)[2]) >> 1; - temp2[2] = U32BIG(((u32*)in)[2]) >> 2; - puckU32ToThree_1(temp2[0]); - puckU32ToThree_1(temp2[1]); - puckU32ToThree_1(temp2[2]); - out[0] = (temp2[1] << 21) | (temp1[0] << 10) | temp0[2]; - out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; - out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ + out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ + ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ + (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ + out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ + ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ + (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ + out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ + ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ + (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ + } void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0] >> 10) & 0x7ff; - temp1[1] = (in[2] >> 11) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; - memcpy(out, t, 12 * sizeof(unsigned char)); + u32 t[3] = { 0 } ;\ +u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ +t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ +((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ +((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ +t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ +((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ +((t0 & 0x3fc00) >> 10); \ +t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ +((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ +((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ +unpuckU32ToThree_3(t[0]); \ +unpuckU32ToThree_3(t[1]); \ +unpuckU32ToThree_3(t[2]); \ +memcpy(out, t, 12 * sizeof(unsigned char)); \ } + void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { unsigned int t, t1, t2; u32 rci, temp; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h index f2d0b5c..c7b7c95 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h @@ -11,19 +11,23 @@ typedef unsigned long long u64; void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) @@ -171,10 +175,10 @@ void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); u32 t1,rci;\ ROUND384_Three(s,round,lunNum);\ rci=round[lunNum*3+1];\ -ARC(rci);\ -SBOX1_ROR(s[0], s[4], s[8], s[10] );\ -SBOX2_ROR(s[1], s[5], s[6], s[11]);\ -SBOX3_ROR(s[2], s[3], s[7], s[9]);\ + ARC(rci);\ + SBOX1_ROR(s[0], s[4], s[8], s[10] );\ + SBOX2_ROR(s[1], s[5], s[6], s[11]);\ + SBOX3_ROR(s[2], s[3], s[7], s[9]);\ __asm__ __volatile__ ( \ "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ "mov %[t1], %[S_4] \n\t"\ diff --git a/knot/Implementations/crypto_aead/knot128v2/opt_1/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/opt_1/encrypt.c index 1809c4f..75d26fa 100644 --- a/knot/Implementations/crypto_aead/knot128v2/opt_1/encrypt.c +++ b/knot/Implementations/crypto_aead/knot128v2/opt_1/encrypt.c @@ -1,4 +1,5 @@ #include"api.h" +#include typedef unsigned char u8; typedef unsigned long long u64; typedef unsigned int u32; diff --git a/knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/encrypt.c index 9cb8b3b..0645f40 100644 --- a/knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/encrypt.c +++ b/knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/encrypt.c @@ -1,4 +1,5 @@ #include"api.h" +#include typedef unsigned char u8; typedef unsigned long long u64; typedef unsigned int u32; diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c index 896381a..e263a92 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c @@ -12,85 +12,39 @@ void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { } -//puck begin// -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t1_32, t2_64, t2_65; - u32 t[3] = { 0 }; - temp0[0] = in[0] & 0xffe00000; - temp1[0] = (in[0] & 0x001ffc00) << 11; - temp2[0] = (in[0] & 0x000003ff) << 22; - temp0[1] = in[1] & 0xffe00000; - temp1[1] = (in[1] & 0x001ff800) << 11; - t2_64 = ((in[1] & 0x00000400) << 21); - temp2[1] = (in[1] & 0x000003ff) << 22; - temp0[2] = in[2] & 0xffc00000; - t1_32 = ((in[2] & 0x00200000) << 10); - temp1[2] = (in[2] & 0x001ff800) << 11; - t2_65 = ((in[2] & 0x00000400) << 20); - temp2[2] = (in[2] & 0x000003ff) << 22; - unpuckU32ToThree(temp0[0]); - unpuckU32ToThree(temp0[1]); - unpuckU32ToThree(temp0[2]); - t[2] = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; - unpuckU32ToThree(temp1[0]); - unpuckU32ToThree(temp1[1]); - unpuckU32ToThree(temp1[2]); - t[1] = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); - unpuckU32ToThree(temp2[0]); - unpuckU32ToThree(temp2[1]); - unpuckU32ToThree(temp2[2]); - t[0] = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); - memcpy(out, t, 12 * sizeof(unsigned char)); -} -void packU96FormatToThreePacket(u32 * out, u8 * in) { - u32 t0 = U32BIG(((u32*)in)[2]); - u32 t1 = U32BIG(((u32*)in)[1]); - u32 t2 = U32BIG(((u32*)in)[0]); - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 temp0[3] = { 0 }; - u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; - t1 = t1 << 1; - t2 = t2 << 2; - temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2; - puckU32ToThree(temp0[0]); - puckU32ToThree(temp0[1]); - puckU32ToThree(temp0[2]); - temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; - puckU32ToThree(temp1[0]); - puckU32ToThree(temp1[1]); - puckU32ToThree(temp1[2]); - temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; - puckU32ToThree(temp2[0]); - puckU32ToThree(temp2[1]); - puckU32ToThree(temp2[2]); - out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); - out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); - out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); -} -void packU48FormatToThreePacket(u32 * out, u8 * in) { - u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); - u32 t2 = U32BIG(*(u32*)(in)); - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u8 t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; - t1 = t1 << 1; - t2 = t2 << 2; - temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; - puckU32ToThree(temp1[0]); - puckU32ToThree(temp1[1]); - puckU32ToThree(temp1[2]); - temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; - puckU32ToThree(temp2[0]); - puckU32ToThree(temp2[1]); - puckU32ToThree(temp2[2]); - out[0] = (temp1[0] >> 11) | (temp2[0] >> 22); - out[1] = (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); - out[2] = (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ + + out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | + ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | + (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); + out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | + ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | + (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); + out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | + ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | + (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); +} +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 } ;\ +u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ +t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ +((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ +((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ +t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ +((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ +((t0 & 0x3fc00) >> 10); \ +t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ +((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ +((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ +unpuckU32ToThree_3(t[0]); \ +unpuckU32ToThree_3(t[1]); \ +unpuckU32ToThree_3(t[2]); \ +memcpy(out, t, 12 * sizeof(unsigned char)); \ } unsigned char constant7Format[76] = { /*constant7Format[127]:*/ diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h index 49f00e6..aaa41e6 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h @@ -16,26 +16,23 @@ typedef unsigned long long u64; #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) - -/* -s0 s1 s2 -s3 s4 s5 -s6 s7 s8 -s9 s10 s11 -*/ -#define puckU32ToThree(x){\ -x &= 0x92492492;\ -x = (x | (x << 2)) & 0xc30c30c3;\ -x = (x | (x << 4)) & 0xf00f00f0;\ -x = (x | (x << 8)) & 0xff0000ff;\ -x = (x | (x << 16)) & 0xfff00000;\ +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree(x){\ -x &= 0xfff00000;\ -x = (x | (x >> 16)) & 0xff0000ff;\ -x = (x | (x >> 8)) & 0xf00f00f0;\ -x = (x | (x >> 4)) & 0xc30c30c3;\ -x = (x | (x >> 2)) & 0x92492492;\ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } unsigned char constant7Format[76]; diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c index f4fc2ce..a539edd 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c @@ -4,6 +4,16 @@ #define PR0_ROUNDS 76 #define PR_ROUNDS 40 #define PRF_ROUNDS 44 +/* + +#define PR0_ROUNDS 76 +#define PR_ROUNDS 56 +#define PRF_ROUNDS 60 + +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 + * */ void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { packU96FormatToThreePacket(s, npub); packU96FormatToThreePacket(s + 3, npub + 12); @@ -91,7 +101,7 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigne { u32 dataFormat[6] = { 0 }; u32 dataFormat_1[3] = { 0 }; - u8 i,tempU8[48] = { 0 }; + u8 tempData[48] = { 0 },tempU8[48] = { 0 }; if (clen) { while (clen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, c); @@ -108,12 +118,15 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigne c += aead_RATE; } unpackU96FormatToThreePacket(tempU8, s); - for (i = 0; i < clen; ++i, ++m, ++c) - { - *m = tempU8[i] ^ *c; - tempU8[i] = *c; - } - tempU8[i] ^= 0x01; + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG(((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG(((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG(((u32* )tempData)[2]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; packU96FormatToThreePacket(s, tempU8); } diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h index f48ef1d..0320037 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h @@ -16,98 +16,65 @@ typedef unsigned long long u64; #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) -#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +unsigned char constant7Format[76] ; #define sbox(a, b, c, d, f, g, h) \ { \ t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ } - -#define puckU32ToThree(x){\ -x &= 0x92492492;\ -x = (x | (x << 2)) & 0xc30c30c3;\ -x = (x | (x << 4)) & 0xf00f00f0;\ -x = (x | (x << 8)) & 0xff0000ff;\ -x = (x | (x << 16)) & 0xfff00000;\ + +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree(x){\ -x &= 0xfff00000;\ -x = (x | (x >> 16)) & 0xff0000ff;\ -x = (x | (x >> 8)) & 0xf00f00f0;\ -x = (x | (x >> 4)) & 0xc30c30c3;\ -x = (x | (x >> 2)) & 0x92492492;\ -} -#define packU48FormatToThreePacket( out, in) {\ -t1 = (u32)U16BIG(*(u16*)(in + 4)); \ -t2 = U32BIG(*(u32*)(in)); \ -t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ -t1 = t1 << 1; \ -t2 = t2 << 2; \ -temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ -puckU32ToThree(temp1[0]); \ -puckU32ToThree(temp1[1]); \ -puckU32ToThree(temp1[2]); \ -temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ -puckU32ToThree(temp2[0]); \ -puckU32ToThree(temp2[1]); \ -puckU32ToThree(temp2[2]); \ -out[0] = (temp1[0] >> 11) | (temp2[0] >> 22); \ -out[1] = (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ -out[2] = (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } +#define packU96FormatToThreePacket( out, in) { \ + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ + out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ + ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ + (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ + out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ + ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ + (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ + out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ + ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ + (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ +} -#define packU96FormatToThreePacket(out, in) {\ -t9 = U32BIG(((u32*)in)[2]); \ -t1 = U32BIG(((u32*)in)[1]); \ -t2 = U32BIG(((u32*)in)[0]); \ -t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ -t1 = t1 << 1; \ -t2 = t2 << 2; \ -temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \ -puckU32ToThree(temp0[0]); \ -puckU32ToThree(temp0[1]); \ -puckU32ToThree(temp0[2]); \ -temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ -puckU32ToThree(temp1[0]); \ -puckU32ToThree(temp1[1]); \ -puckU32ToThree(temp1[2]); \ -temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ -puckU32ToThree(temp2[0]); \ -puckU32ToThree(temp2[1]); \ -puckU32ToThree(temp2[2]); \ -out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \ -out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ -out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ -} #define unpackU96FormatToThreePacket( out, in) {\ -temp0[0] = in[0] & 0xffe00000; \ -temp1[0] = (in[0] & 0x001ffc00) << 11; \ -temp2[0] = (in[0] & 0x000003ff) << 22; \ -temp0[1] = in[1] & 0xffe00000; \ -temp1[1] = (in[1] & 0x001ff800) << 11; \ -t2_64 = ((in[1] & 0x00000400) << 21); \ -temp2[1] = (in[1] & 0x000003ff) << 22; \ -temp0[2] = in[2] & 0xffc00000; \ -t1_32 = ((in[2] & 0x00200000) << 10); \ -temp1[2] = (in[2] & 0x001ff800) << 11; \ -t2_65 = ((in[2] & 0x00000400) << 20); \ -temp2[2] = (in[2] & 0x000003ff) << 22; \ -unpuckU32ToThree(temp0[0]); \ -unpuckU32ToThree(temp0[1]); \ -unpuckU32ToThree(temp0[2]); \ -t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \ -unpuckU32ToThree(temp1[0]); \ -unpuckU32ToThree(temp1[1]); \ -unpuckU32ToThree(temp1[2]); \ -t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \ -unpuckU32ToThree(temp2[0]); \ -unpuckU32ToThree(temp2[1]); \ -unpuckU32ToThree(temp2[2]); \ -t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ -*(u32*)(out) = U32BIG(t2); \ -*(u32*)(out + 4) = U32BIG(t1); \ -*(u32*)(out + 8) = U32BIG(t9); \ + u32 t[3] = { 0 };\ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ + ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ + ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ + t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ + ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ + ((t0 & 0x3fc00) >> 10); \ + t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ + ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ + ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ + unpuckU32ToThree_3(t[0]); \ + unpuckU32ToThree_3(t[1]); \ + unpuckU32ToThree_3(t[2]); \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ } + #define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ t3= LOTR32(t2, 2);\ t4 =LOTR32(t0, 3);\ @@ -118,4 +85,24 @@ t3= LOTR32(t1, 18); \ t4 = LOTR32(t2, 18);\ t5 = LOTR32(t0, 19); \ } +#define ROUND384(lunNum) {\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ +s[5] = LOTR32(s_temp[3], 1); \ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +} + +#define Processing_Data(data) \ +do { \ + packU96FormatToThreePacket(dataFormat, data); \ + s[0] ^= dataFormat[0]; \ + s[1] ^= dataFormat[1]; \ + s[2] ^= dataFormat[2]; \ +} while (0) + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c index 9513625..58a89db 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c @@ -1,50 +1,40 @@ - #include"auxFormat.h" #define aead_RATE (96 / 8) #define PR0_ROUNDS 76 #define PR_ROUNDS 40 #define PRF_ROUNDS 44 -unsigned char constant7Format[127] = { - /*constant7Format[127]:*/ -0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, -0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, -0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, -0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, -0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, -0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, -0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, -0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, -0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46, -0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb, -0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, }; -#define ROUND384(lunNum) {\ -s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ -s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ -s[2] ^= constant7Format[lunNum] & 0x7;\ -sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ -sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ -sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ -s[5] = LOTR32(s_temp[3], 1); \ -U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ -U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ -} +/* + + #define PR0_ROUNDS 76 + #define PR_ROUNDS 56 + #define PRF_ROUNDS 60 + + * */ +unsigned char constant7Format[76] = { +/*constant7Format[127]:*/ +0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, + 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, + 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, + 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, + 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, + 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, + 0x1e, 0xd1, 0x8f, }; + int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const unsigned char *k) { + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { - u8 i; + u8 i; u32 s[12] = { 0 }; u32 dataFormat[3] = { 0 }; u8 tempData[12] = { 0 }; + u8 tempU8[48] = { 0 }; u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; - u32 t1_32, t2_64, t2_65; - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; *clen = mlen + CRYPTO_ABYTES; // initialization packU96FormatToThreePacket(s, npub); @@ -55,12 +45,10 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, ROUND384(i); } // process associated data + //PAD(adlen,ad); if (adlen) { while (adlen >= aead_RATE) { - packU96FormatToThreePacket(dataFormat, ad); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; + Processing_Data(ad); for (i = 0; i < PR_ROUNDS; i++) { ROUND384(i); } @@ -70,21 +58,16 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, memset(tempData, 0, sizeof(tempData)); memcpy(tempData, ad, adlen); tempData[adlen] = 0x01; - packU96FormatToThreePacket(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; + Processing_Data(tempData); for (i = 0; i < PR_ROUNDS; i++) { ROUND384(i); } } s[9] ^= 0x80000000; - if (mlen) { +// P_EN(mlen, m); + if (mlen) { while (mlen >= aead_RATE) { - packU96FormatToThreePacket(dataFormat, m); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; + Processing_Data(m); unpackU96FormatToThreePacket(c, s); for (i = 0; i < PR_ROUNDS; i++) { ROUND384(i); @@ -96,10 +79,7 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, memset(tempData, 0, sizeof(tempData)); memcpy(tempData, m, mlen); tempData[mlen] = 0x01; - packU96FormatToThreePacket(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; + Processing_Data(tempData); unpackU96FormatToThreePacket(tempData, s); memcpy(c, tempData, mlen); c += mlen; @@ -108,17 +88,17 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, for (i = 0; i < PRF_ROUNDS; i++) { ROUND384(i); } - // return tag - unpackU96FormatToThreePacket(c, s); - unpackU96FormatToThreePacket((c + 12), (s + 3)); + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + memcpy(c, tempU8, CRYPTO_ABYTES * sizeof(unsigned char)); return 0; } int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, const unsigned char *k) { - u8 i, j; + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u8 i; u32 s[12] = { 0 }; u32 dataFormat[6] = { 0 }; u32 dataFormat_1[3] = { 0 }; @@ -126,10 +106,6 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, u8 tempU8[48] = { 0 }; u32 s_temp[12] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; - u32 t1_32, t2_64, t2_65; - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; *mlen = clen - CRYPTO_ABYTES; if (clen < CRYPTO_ABYTES) return -1; @@ -144,10 +120,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, // process associated data if (adlen) { while (adlen >= aead_RATE) { - packU96FormatToThreePacket(dataFormat, ad); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; + Processing_Data(ad); for (i = 0; i < PR_ROUNDS; i++) { ROUND384(i); } @@ -155,26 +128,24 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, ad += aead_RATE; } memset(tempData, 0, sizeof(tempData)); - memcpy(tempData, ad, adlen); tempData[adlen] = 0x01; - packU96FormatToThreePacket(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; + Processing_Data(tempData); for (i = 0; i < PR_ROUNDS; i++) { ROUND384(i); } } s[9] ^= 0x80000000; clen -= CRYPTO_ABYTES; +// P_DE(clen,c); if (clen) { while (clen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, c); dataFormat_1[0] = s[0] ^ dataFormat[0]; dataFormat_1[1] = s[1] ^ dataFormat[1]; dataFormat_1[2] = s[2] ^ dataFormat[2]; - unpackU96FormatToThreePacket(m, dataFormat_1); + unpackU96FormatToThreePacket(m, dataFormat_1);\ + s[0] = dataFormat[0]; s[1] = dataFormat[1]; s[2] = dataFormat[2]; @@ -186,24 +157,29 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, c += aead_RATE; } unpackU96FormatToThreePacket(tempU8, s); - for (i = 0; i < clen; ++i, ++m, ++c) - { - *m = tempU8[i] ^ *c; - tempU8[i] = *c; - } - tempU8[i] ^= 0x01; + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG( + ((u32* )tempData)[2]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; packU96FormatToThreePacket(s, tempU8); } // finalization for (i = 0; i < PRF_ROUNDS; i++) { ROUND384(i); } - // return tag unpackU96FormatToThreePacket(tempU8, s); unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); - if (memcmp((void*)tempU8, (void*)c, CRYPTO_ABYTES)) { + if (memcmp((void*) tempU8, (void*) c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); *mlen = 0; - memset(m, 0, sizeof(unsigned char) * (clen - CRYPTO_ABYTES)); return -1; } return 0; diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c index 7c1f7cf..a31721d 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c @@ -17,86 +17,38 @@ void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { } } +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ + out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ + ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ + (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ + out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ + ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ + (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ + out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ + ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ + (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ -//puck begin// -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t1_32, t2_64, t2_65; - u32 t[3] = { 0 }; - temp0[0] = in[0] & 0xffe00000; - temp1[0] = (in[0] & 0x001ffc00) << 11; - temp2[0] = (in[0] & 0x000003ff) << 22; - temp0[1] = in[1] & 0xffe00000; - temp1[1] = (in[1] & 0x001ff800) << 11; - t2_64 = ((in[1] & 0x00000400) << 21); - temp2[1] = (in[1] & 0x000003ff) << 22; - temp0[2] = in[2] & 0xffc00000; - t1_32 = ((in[2] & 0x00200000) << 10); - temp1[2] = (in[2] & 0x001ff800) << 11; - t2_65 = ((in[2] & 0x00000400) << 20); - temp2[2] = (in[2] & 0x000003ff) << 22; - unpuckU32ToThree(temp0[0]); - unpuckU32ToThree(temp0[1]); - unpuckU32ToThree(temp0[2]); - t[2] = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; - unpuckU32ToThree(temp1[0]); - unpuckU32ToThree(temp1[1]); - unpuckU32ToThree(temp1[2]); - t[1] = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); - unpuckU32ToThree(temp2[0]); - unpuckU32ToThree(temp2[1]); - unpuckU32ToThree(temp2[2]); - t[0] = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); - memcpy(out, t, 12 * sizeof(unsigned char)); } -void packU96FormatToThreePacket(u32 * out, u8 * in) { - u32 t0 = U32BIG(((u32*)in)[2]); - u32 t1 = U32BIG(((u32*)in)[1]); - u32 t2 = U32BIG(((u32*)in)[0]); - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 temp0[3] = { 0 }; - u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; - t1 = t1 << 1; - t2 = t2 << 2; - temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2; - puckU32ToThree(temp0[0]); - puckU32ToThree(temp0[1]); - puckU32ToThree(temp0[2]); - temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; - puckU32ToThree(temp1[0]); - puckU32ToThree(temp1[1]); - puckU32ToThree(temp1[2]); - temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; - puckU32ToThree(temp2[0]); - puckU32ToThree(temp2[1]); - puckU32ToThree(temp2[2]); - out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); - out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); - out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); -} - -void packU48FormatToThreePacket(u32 * out, u8 * in) { - u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); - u32 t2 = U32BIG(*(u32*)(in)); - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u8 t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; - t1 = t1 << 1; - t2 = t2 << 2; - temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; - puckU32ToThree(temp1[0]); - puckU32ToThree(temp1[1]); - puckU32ToThree(temp1[2]); - temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; - puckU32ToThree(temp2[0]); - puckU32ToThree(temp2[1]); - puckU32ToThree(temp2[2]); - out[0] = (temp1[0] >> 11) | (temp2[0] >> 22); - out[1] = (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); - out[2] = (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 } ;\ +u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ +t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ +((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ +((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ +t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ +((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ +((t0 & 0x3fc00) >> 10); \ +t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ +((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ +((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ +unpuckU32ToThree_3(t[0]); \ +unpuckU32ToThree_3(t[1]); \ +unpuckU32ToThree_3(t[2]); \ +memcpy(out, t, 12 * sizeof(unsigned char)); \ } unsigned char constant7Format[76] = { /*constant7Format[127]:*/ diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h index 5870426..5fa905e 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h @@ -14,8 +14,10 @@ typedef unsigned long long u64; #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) - -#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +void packU48FormatToThreePacket(u32 * out, u8 * in) ; +void unpackU96FormatToThreePacket(u8 * out, u32 * in) ; +void packU96FormatToThreePacket(u32 * out, u8 * in); +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) ; #define sbox(a, b, c, d, f, g, h) \ { \ t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ @@ -32,26 +34,26 @@ t3= LOTR32(t1, 18); \ t4 = LOTR32(t2, 18);\ t5 = LOTR32(t0, 19); \ } -/* -s0 s1 s2 -s3 s4 s5 -s6 s7 s8 -s9 s10 s11 -*/ -#define puckU32ToThree(x){\ -x &= 0x92492492;\ -x = (x | (x << 2)) & 0xc30c30c3;\ -x = (x | (x << 4)) & 0xf00f00f0;\ -x = (x | (x << 8)) & 0xff0000ff;\ -x = (x | (x << 16)) & 0xfff00000;\ -} -#define unpuckU32ToThree(x){\ -x &= 0xfff00000;\ -x = (x | (x >> 16)) & 0xff0000ff;\ -x = (x | (x >> 8)) & 0xf00f00f0;\ -x = (x | (x >> 4)) & 0xc30c30c3;\ -x = (x | (x >> 2)) & 0x92492492;\ -} + unsigned char constant7Format[76]; + +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +} +#define unpuckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +} diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_4/encrypt.c index f4fc2ce..0482586 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_4/encrypt.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4/encrypt.c @@ -1,9 +1,20 @@ #include"auxFormat.h" #define aead_RATE (96 / 8) + +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 +/* + +#define PR0_ROUNDS 76 +#define PR_ROUNDS 56 +#define PRF_ROUNDS 60 + #define PR0_ROUNDS 76 #define PR_ROUNDS 40 #define PRF_ROUNDS 44 + * */ void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { packU96FormatToThreePacket(s, npub); packU96FormatToThreePacket(s + 3, npub + 12); @@ -68,10 +79,12 @@ void ProcessPlaintext(u32 *s, const u8* m, unsigned long long mlen, unsigned cha } void Finalize_GenerateTag(u32 *s, unsigned char *c) { + u8 tempU8[32] = { 0 }; P384(s, constant7Format, PRF_ROUNDS); // return tag - unpackU96FormatToThreePacket(c, s); - unpackU96FormatToThreePacket(c + 12, s + 3); + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + memcpy(c, tempU8, CRYPTO_ABYTES * sizeof(unsigned char)); } int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m, unsigned long long *mlen) { @@ -91,7 +104,7 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigne { u32 dataFormat[6] = { 0 }; u32 dataFormat_1[3] = { 0 }; - u8 i,tempU8[48] = { 0 }; + u8 tempData[48] = { 0 },tempU8[48] = { 0 }; if (clen) { while (clen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, c); @@ -108,12 +121,15 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigne c += aead_RATE; } unpackU96FormatToThreePacket(tempU8, s); - for (i = 0; i < clen; ++i, ++m, ++c) - { - *m = tempU8[i] ^ *c; - tempU8[i] = *c; - } - tempU8[i] ^= 0x01; + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG(((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG(((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG(((u32* )tempData)[2]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; packU96FormatToThreePacket(s, tempU8); } diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.c index f2bb626..6d4f512 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.c @@ -1,88 +1,64 @@ - #include"auxFormat.h" -void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum) {\ - unsigned int t,t1,t2; - u32 rci; - rci=c[0]; - ROUND384_1(rci); +void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { + unsigned int t, t1, t2; + u32 rci, temp; + rci = c[0]; + ARC(rci); + SBOX(s[0], s[3], s[6], s[9]); + SBOX(s[1], s[4], s[7], s[10]); + SBOX(s[2], s[5], s[8], s[11]); t = 1; while (lunnum--) { - rci=c[t]; - ROUND384_2(rci); - t++; - rci=c[t]; - ROUND384_3(rci); - t++; - rci=c[t]; - ROUND384_4(rci); - t++; + temp = ((u32*) (c + t))[0]; + rci = temp & 0xff; + ARC(rci); + SBOX1(s[0], s[4], s[8], s[10] ); + SBOX2(s[1], s[5], s[6], s[11]); + SBOX3(s[2], s[3], s[7], s[9]); + rci = (temp & 0xff00) >> 8; + ARC(rci); + SBOX1(s[0], s[5], s[7], s[11]); + SBOX2(s[1], s[3], s[8], s[9]); + SBOX3(s[2], s[4], s[6], s[10]); + rci = (temp & 0xff0000) >> 16; + ARC(rci); + SBOX1(s[0], s[3], s[6], s[9]); + SBOX2(s[1], s[4], s[7], s[10]); + SBOX3(s[2], s[5], s[8], s[11]); + t += 3; } } +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ + out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ + ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ + (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ + out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ + ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ + (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ + out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ + ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ + (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ - - -void packU96FormatToThreePacket(u32 * out, u8 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; - puckU32ToThree_1(temp2[0]); - puckU32ToThree_1(temp2[1]); - puckU32ToThree_1(temp2[2]); - out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; - out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; - out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; -} -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0]>>10) & 0x7ff; - temp1[1] = (in[2] >>11 ) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; - memcpy(out, t, 12 * sizeof(unsigned char)); } -void packU48FormatToThreePacket(u32 * out, u8 * in) { - u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - out[0] = (temp1[0] << 10) | temp0[2]; - out[1] = (temp1[2] << 11) | temp0[1]; - out[2] = (temp1[1] << 11) | temp0[0]; +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 },r0 ;\ +u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ +t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ +((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ +((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ +t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ +((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ +((t0 & 0x3fc00) >> 10); \ +t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ +((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ +((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ +unpuckU32ToThree_3(t[0]); \ +unpuckU32ToThree_3(t[1]); \ +unpuckU32ToThree_3(t[2]); \ +memcpy(out, t, 12 * sizeof(unsigned char)); \ } diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.h index bc59690..4bdeb02 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.h @@ -11,19 +11,23 @@ typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ + +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); #define ARC(rci) \ @@ -108,6 +112,7 @@ void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ : : );\ }while (0) + #define SBOX3(S1,S2,S3,S4) \ do { \ __asm__ __volatile__ ( \ @@ -132,30 +137,6 @@ void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ : : );\ }while (0) -#define ROUND384_1(rci) {\ - ARC(rci);\ -SBOX(s[0], s[3], s[6], s[9] );\ -SBOX(s[1], s[4], s[7], s[10]);\ -SBOX(s[2], s[5], s[8], s[11]);\ -} -#define ROUND384_2(rci) {\ - ARC(rci);\ -SBOX1(s[0], s[4], s[8], s[10] );\ -SBOX2(s[1], s[5], s[6], s[11]);\ -SBOX3(s[2], s[3], s[7], s[9]);\ -} -#define ROUND384_3(rci) {\ - ARC(rci);\ -SBOX1(s[0], s[5], s[7], s[11]);\ -SBOX2(s[1], s[3], s[8], s[9]);\ -SBOX3(s[2], s[4], s[6], s[10]);\ -} -#define ROUND384_4(rci) {\ - ARC(rci);\ -SBOX1(s[0], s[3], s[6], s[9]);\ -SBOX2(s[1], s[4], s[7], s[10]);\ -SBOX3(s[2], s[5], s[8], s[11]);\ -} #define P384_1( s, round, lunNum) {\ u32 t1;\ ROUND384_Three(s,round,lunNum);\ @@ -185,9 +166,12 @@ SBOX3(s[2], s[5], s[8], s[11]);\ #define P384_2( s, round, lunNum) {\ u32 t1,rci;\ ROUND384_Three(s,round,lunNum);\ - rci=round[lunNum*3+1];\ - ROUND384_2(rci);\ - __asm__ __volatile__ ( \ + rci=round[lunNum*3+1];\ + ARC(rci);\ + SBOX1(s[0], s[4], s[8], s[10] );\ + SBOX2(s[1], s[5], s[6], s[11]);\ + SBOX3(s[2], s[3], s[7], s[9]);\ + __asm__ __volatile__ ( \ "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ "mov %[t1], %[S_4] \n\t"\ "mov %[S_4], %[S_3] \n\t"\ diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_5/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_5/encrypt.c index 16c1d76..c6b6838 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_5/encrypt.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_5/encrypt.c @@ -6,20 +6,7 @@ #define PR0_ROUNDS 25 #define PR_ROUNDS 13 #define PRF_ROUNDS 14 -/* -#define PR0_ROUNDS 25 -#define PR_ROUNDS 18 -#define PRF_ROUNDS 19 - -#define PR0_ROUNDS 25 -#define PR_ROUNDS 13 -#define PRF_ROUNDS 14 - -#define PR0_ROUNDS 76 /3=25+1 -#define PR_ROUNDS 40 /3=13+1 -#define PRF_ROUNDS 44 /3=14+2 - * */ unsigned char constant7Format[76] = { 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, @@ -35,12 +22,8 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, const unsigned char *k) { u32 s[12] = { 0 }; u32 dataFormat[3] = { 0 }; - u8 tempData[12] = { 0 }; + u8 tempData[24] = { 0 }; u32 t2; - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; *clen = mlen + CRYPTO_ABYTES; // initialization packU96FormatToThreePacket(s, npub); @@ -96,8 +79,9 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, // finalization P384_2(s, constant7Format,PRF_ROUNDS); // return tag - unpackU96FormatToThreePacket(c, s); - unpackU96FormatToThreePacket((c + 12), (s + 3)); + unpackU96FormatToThreePacket(tempData, s); + unpackU96FormatToThreePacket((tempData + 12), (s + 3)); + memcpy(c, tempData, CRYPTO_ABYTES * sizeof(unsigned char)); return 0; } @@ -111,10 +95,6 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, u8 tempData[12] = { 0 }; u8 tempU8[48] = { 0 }; u32 t2; - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; \ *mlen = clen - CRYPTO_ABYTES; if (clen < CRYPTO_ABYTES) return -1; @@ -123,7 +103,6 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, packU96FormatToThreePacket((s + 3), (npub + 12)); packU96FormatToThreePacket((s + 6), k); packU96FormatToThreePacket((s + 9), (k + 12)); - P384_1(s, constant7Format,PR0_ROUNDS); // process associated data if (adlen) { @@ -138,14 +117,12 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, ad += aead_RATE; } memset(tempData, 0, sizeof(tempData)); - memcpy(tempData, ad, adlen); tempData[adlen] = 0x01; packU96FormatToThreePacket(dataFormat, tempData); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; - P384_1(s, constant7Format,PR_ROUNDS); } s[9] ^= 0x80000000; @@ -153,14 +130,12 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, if (clen) { while (clen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, c); - dataFormat_1[0] = s[0] ^ dataFormat[0]; - dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[0] = s[0] ^ dataFormat[0]; dataFormat_1[1] = s[1] ^ dataFormat[1]; dataFormat_1[2] = s[2] ^ dataFormat[2]; unpackU96FormatToThreePacket(m, dataFormat_1); s[0] = dataFormat[0]; s[1] = dataFormat[1]; s[2] = dataFormat[2]; - P384_1(s, constant7Format,PR_ROUNDS); clen -= aead_RATE; m += aead_RATE; @@ -179,15 +154,14 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, c+=clen; } // finalization - P384_2(s, constant7Format,PRF_ROUNDS); // return tag - packU96FormatToThreePacket(dataFormat, c); - packU96FormatToThreePacket((dataFormat + 3), (c + 12)); - if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3] - || dataFormat[4] != s[4] || dataFormat[5] != s[5]) { + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); + if (memcmp((void*)tempU8, (void*)(c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; return -1; } - ////////// return 0; } diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c index 9703080..0390123 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c @@ -28,78 +28,36 @@ void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { t += 3; } } - void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - temp0[0] = U32BIG(((u32* )in)[0]); - temp0[1] = U32BIG(((u32*)in)[0]) >> 1; - temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = U32BIG(((u32* )in)[1]); - temp1[1] = U32BIG(((u32*)in)[1]) >> 1; - temp1[2] = U32BIG(((u32*)in)[1]) >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - temp2[0] = U32BIG(((u32* )in)[2]); - temp2[1] = U32BIG(((u32*)in)[2]) >> 1; - temp2[2] = U32BIG(((u32*)in)[2]) >> 2; - puckU32ToThree_1(temp2[0]); - puckU32ToThree_1(temp2[1]); - puckU32ToThree_1(temp2[2]); - out[0] = (temp2[1] << 21) | (temp1[0] << 10) | temp0[2]; - out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; - out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ + out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ + ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ + (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ + out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ + ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ + (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ + out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ + ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ + (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ + } void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0] >> 10) & 0x7ff; - temp1[1] = (in[2] >> 11) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; - memcpy(out, t, 12 * sizeof(unsigned char)); -} -void packU48FormatToThreePacket(u32 *out, u8 *in) { - u32 t1 = (u32) U16BIG(*(u16* )(in + 4)); - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - temp0[0] = U32BIG(((u32* )in)[0]); - temp0[1] = U32BIG(((u32*)in)[0]) >> 1; - temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = t1; - temp1[1] = t1 >> 1; - temp1[2] = t1 >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - out[0] = (temp1[0] << 10) | temp0[2]; - out[1] = (temp1[2] << 11) | temp0[1]; - out[2] = (temp1[1] << 11) | temp0[0]; + u32 t[3] = { 0 };\ +u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ +t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ +((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ +((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ +t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ +((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ +((t0 & 0x3fc00) >> 10); \ +t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ +((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ +((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ +unpuckU32ToThree_3(t[0]); \ +unpuckU32ToThree_3(t[1]); \ +unpuckU32ToThree_3(t[2]); \ +memcpy(out, t, 12 * sizeof(unsigned char)); \ } diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h index 81c4cd6..58944ef 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h @@ -11,21 +11,29 @@ typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ + +void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); + + +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } -void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); + #define ARC(rci) \ do { \ __asm__ __volatile__ ( \ diff --git a/knot/Implementations/crypto_aead/knot192/opt_1/encrypt.c b/knot/Implementations/crypto_aead/knot192/opt_1/encrypt.c index eda0261..665fa68 100644 --- a/knot/Implementations/crypto_aead/knot192/opt_1/encrypt.c +++ b/knot/Implementations/crypto_aead/knot192/opt_1/encrypt.c @@ -1,4 +1,5 @@ #include"api.h" +#include typedef unsigned char u8; typedef unsigned long long u64; diff --git a/knot/Implementations/crypto_aead/knot192/opt_2/encrypt.c b/knot/Implementations/crypto_aead/knot192/opt_2/encrypt.c index 8fb1ea8..b2ba5e6 100644 --- a/knot/Implementations/crypto_aead/knot192/opt_2/encrypt.c +++ b/knot/Implementations/crypto_aead/knot192/opt_2/encrypt.c @@ -1,5 +1,6 @@ #include"api.h" +#include typedef unsigned char u8; typedef unsigned long long u64; typedef unsigned int u32; diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.c index 93f0bb8..bb2f858 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.c @@ -21,51 +21,37 @@ void P512(unsigned int *s, unsigned char *round, unsigned char rounds) { } } void packU128FormatToFourPacket(u32 * out, u8 * in) { - u32 t0 = U32BIG(((u32* )in)[0]); - u32 t1 = U32BIG(((u32* )in)[1]); - u32 t2 = U32BIG(((u32* )in)[2]); - u32 t3 = U32BIG(((u32* )in)[3]); - u32 temp1; - puck32(t0); - puck32(t0); - puck32(t1); - puck32(t1); - puck32(t2); - puck32(t2); - puck32(t3); - puck32(t3); - out[3] = (t3 & 0xff000000) | ((t2 >> 8) & 0x00ff0000) - | ((t1 >> 16) & 0x0000ff00) | (t0 >> 24); - out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) - | ((t1 >> 8) & 0x0000ff00) | ((t0 >> 16) & 0x000000ff); - out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) - | (t1 & 0x0000ff00) | ((t0 >> 8) & 0x000000ff); - out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) - | ((t1 << 8) & 0x0000ff00) | (t0 & 0x000000ff); + u32 t0 = U32BIG(((u32*)in)[0]); + u32 t1 = U32BIG(((u32*)in)[1]); + u32 t2 = U32BIG(((u32*)in)[2]); + u32 t3 = U32BIG(((u32*)in)[3]); + puckU32ToFour(t0); + puckU32ToFour(t1); + puckU32ToFour(t2); + puckU32ToFour(t3); + out[3] = (t3 & 0xff000000) | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t0 >> 24); + out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) | ((t1 >> 8) & 0x0000ff00) | ((t0 >> 16) & 0x000000ff); + out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) | (t1 & 0x0000ff00) | ((t0 >> 8) & 0x000000ff); + out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) | ((t1 << 8) & 0x0000ff00) | (t0 & 0x000000ff); } void unpackU128FormatToFourPacket(u8 * out, u32 * in) { - u32 t[4] = { 0 }; - u32 r0; - t[3] = (in[3] & 0xff000000 )| ((in[2] >> 8) & 0x00ff0000) - | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); - t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) - | ((in[1] >> 8) & 0x0000ff00) | ((in[0] >> 16) & 0x000000ff); - t[1] = ((in[3] << 16) & 0xff000000) | ((in[2] << 8) & 0x00ff0000) - | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); - t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) - | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); - unpuck32(t[0]); - unpuck32(t[0]); - unpuck32(t[1]); - unpuck32(t[1]); - unpuck32(t[2]); - unpuck32(t[2]); - unpuck32(t[3]); - unpuck32(t[3]); - memcpy(out, t, 16 * sizeof(unsigned char)); + u32 temp[4] = { 0 }; + u32 t0, t1, t2, t3; + memcpy(temp, in, sizeof(unsigned int) * 4); + t3 = (temp[3] & 0xff000000 )| ((temp[2] >> 8) & 0x00ff0000) | ((temp[1] >> 16) & 0x0000ff00) | (temp[0] >> 24); + t2 = ((temp[3] << 8) & 0xff000000) | (temp[2] & 0x00ff0000) | ((temp[1] >> 8) & 0x0000ff00) | ((temp[0] >> 16) & 0x000000ff); + t1 = ((temp[3] << 16) & 0xff000000) | ((temp[2] << 8) & 0x00ff0000) | (temp[1] & 0x0000ff00) | ((temp[0] >> 8) & 0x000000ff); + t0 = ((temp[3] << 24) & 0xff000000) | ((temp[2] << 16) & 0x00ff0000) | ((temp[1] << 8) & 0x0000ff00) | (temp[0] & 0x000000ff); + unpuckU32ToFour(t0); + unpuckU32ToFour(t1); + unpuckU32ToFour(t2); + unpuckU32ToFour(t3); + ((u32*)out)[0] = U32BIG(t0); + ((u32*)out)[1] = U32BIG(t1); + ((u32*)out)[2] = U32BIG(t2); + ((u32*)out)[3] = U32BIG(t3); } - unsigned char constant7Format_aead[100] = { /*constant7_aead_256*/ 0x1, 0x4, 0x10, 0x40, 0x2, 0x8, 0x21, 0x5, 0x14, 0x50, 0x42, 0xa, 0x29, 0x24, diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h index b8a7864..96d1117 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h @@ -10,25 +10,32 @@ typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; -//new -void puckU8FormatToFourPacket(u8 in, u8 *out); +void unpackU128FormatToFourPacket(u8 * out, u32 * in) ; -#define puck32(in)\ -{\ -temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ -temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ -temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ -temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ -} -#define unpuck32(t0){\ - r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ - r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ - r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ - r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ -} +void packU128FormatToFourPacket(u32 * out, u8 * in) ; + +void P512(unsigned int *s, unsigned char *round, unsigned char rounds); unsigned char constant7Format_aead[100]; +#define puckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +} +#define unpuckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +} //t1 #define P512_ARC_1(rci) \ do { \ @@ -49,8 +56,8 @@ unsigned char constant7Format_aead[100]; }while (0) //t1 t2 #define P512_2SC(S1,S2,S3,S4,S5,S6,S7,S8) \ - do { \ - __asm__ __volatile__ ( \ +do { \ +__asm__ __volatile__ ( \ "/*sbox column*/ \n\t"\ "mvns %[S_0], %[S_0] \n\t"\ "ands %[t1], %[S_2], %[S_0] \n\t"\ @@ -71,14 +78,14 @@ unsigned char constant7Format_aead[100]; "orrs %[S_5], %[S_3], %[S_5] \n\t"\ "eors %[S_1], %[S_7], %[S_1] \n\t"\ "eors %[S_5], %[S_5], %[S_1] \n\t"\ - "eors %[t4], %[S_3], %[S_7] \n\t"\ + "eors %[t2], %[S_3], %[S_7] \n\t"\ "eors %[S_7], %[S_7], %[t3] \n\t"\ "ands %[S_1], %[t3] , %[S_1] \n\t"\ - "eors %[S_1], %[t4] , %[S_1] \n\t"\ - "ands %[S_3], %[S_5], %[t4] \n\t"\ + "eors %[S_1], %[t2] , %[S_1] \n\t"\ + "ands %[S_3], %[S_5], %[t2] \n\t"\ "eors %[S_3], %[t3] , %[S_3] \n\t"\ : /* output variables - including inputs that are changed */\ - [t1] "=r" (t1), [t2] "=r" (t2), [t3] "=r" (t3), [t4] "=r" (t9),\ + [t1] "=r" (t1), [t2] "=r" (t2), [t3] "=r" (t3),\ [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) ,\ [S_1] "+r" (S5), [S_3] "+r" (S6), [S_5] "+r" (S7), [S_7] "+r" (S8)\ : : );\ diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c index d30e162..2bcb512 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c @@ -1,9 +1,24 @@ #include"auxFormat.h" #define aead_RATE (128 / 8) + + +#define PR0_ROUNDS 100 +#define PR_ROUNDS 52 +#define PRF_ROUNDS 56 +/* + +#define PR0_ROUNDS 100 +#define PR_ROUNDS 76 +#define PRF_ROUNDS 80 + + + #define PR0_ROUNDS 100 #define PR_ROUNDS 52 #define PRF_ROUNDS 56 + + * */ void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { packU128FormatToFourPacket(s, npub); packU128FormatToFourPacket(s + 4, npub + 16); @@ -91,7 +106,7 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigned long long clen) { u32 dataFormat[8] = { 0 }; u32 dataFormat_1[4] = { 0 }; - u8 i, tempU8[64] = { 0 }; + u8 tempData[64] = { 0 }, tempU8[64] = { 0 }; if (clen) { while (clen >= aead_RATE) { packU128FormatToFourPacket(dataFormat, c); @@ -110,11 +125,20 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, c += aead_RATE; } unpackU128FormatToFourPacket(tempU8, s); - for (i = 0; i < clen; ++i, ++m, ++c) { - *m = tempU8[i] ^ *c; - tempU8[i] = *c; - } - tempU8[i] ^= 0x01; + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG( + ((u32* )tempData)[2]); + U32BIG(((u32*)tempU8)[3]) ^= U32BIG( + ((u32* )tempData)[3]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; packU128FormatToFourPacket(s, tempU8); } } diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_3/auxFormat.h index 29e78bc..c08105d 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_3/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3/auxFormat.h @@ -1,82 +1,70 @@ - #include"crypto_aead.h" #include"api.h" #include +#include +#include #define U32BIG(x) (x) - #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) - #define sbox(a, b, c, d, f, g, h) \ { \ t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ } - typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; -void printU8(char name[], u8 var[], long len, int offset); -// t9 -#define puck32(in)\ -{\ -t9 = (in ^ (in >> 1)) & 0x22222222; in ^= t9 ^ (t9 << 1);\ -t9 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= t9 ^ (t9 << 2);\ -t9 = (in ^ (in >> 4)) & 0x00F000F0; in ^= t9 ^ (t9 << 4);\ -t9 = (in ^ (in >> 8)) & 0x0000FF00; in ^= t9 ^ (t9 << 8);\ +#define puckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 2)) & 0x30303030;lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 1)) & 0x44444444; lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00; lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 2)) & 0x30303030; lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000; lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00; lo ^= r0 ^ (r0 >> 4);\ } -// t9 -#define unpuck32(t0){\ - t9 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= t9 ^ (t9 << 8); \ - t9 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= t9 ^ (t9 << 4); \ - t9 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= t9 ^ (t9 << 2); \ - t9 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= t9 ^ (t9 << 1); \ +#define unpuckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ } -//u32 t1, t2, t3,t8, #define packU128FormatToFourPacket(out,in) {\ t8 = U32BIG(((u32*)in)[0]); \ t1 = U32BIG(((u32*)in)[1]); \ t2 = U32BIG(((u32*)in)[2]); \ t3 = U32BIG(((u32*)in)[3]); \ - puck32(t8); puck32(t8); \ - puck32(t1); puck32(t1); \ - puck32(t2); puck32(t2); \ - puck32(t3); puck32(t3); \ - out[3] = t3 & 0xff000000 | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t8 >> 24); \ - out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) | ((t1 >> 8) & 0x0000ff00) | ((t8 >> 16) & 0x000000ff); \ - out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) | (t1 & 0x0000ff00) | ((t8 >> 8) & 0x000000ff); \ - out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) | ((t1 << 8) & 0x0000ff00) | (t8 & 0x000000ff); \ + puckU32ToFour(t8); \ + puckU32ToFour(t1); \ + puckU32ToFour(t2); \ + puckU32ToFour(t3); \ + out[3] =( (t3 & 0xff000000 )| ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t8 >> 24)); \ + out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) | ((t1 >> 8) & 0x0000ff00) | ((t8 >> 16) & 0x000000ff); \ + out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) | (t1 & 0x0000ff00) | ((t8 >> 8) & 0x000000ff); \ + out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) | ((t1 << 8) & 0x0000ff00) | (t8 & 0x000000ff); \ } //u32 u32 t1, t2, t3,t8, -#define unpackU128FormatToFourPacket( out, dataFormat) {\ -t3 = dataFormat[3] & 0xff000000 | ((dataFormat[2] >> 8) & 0x00ff0000) | ((dataFormat[1] >> 16) & 0x0000ff00) | (dataFormat[0] >> 24); \ -t2 = ((dataFormat[3] << 8) & 0xff000000) | (dataFormat[2] & 0x00ff0000) | ((dataFormat[1] >> 8) & 0x0000ff00) | ((dataFormat[0] >> 16) & 0x000000ff); \ -t1 = ((dataFormat[3] << 16) & 0xff000000) | ((dataFormat[2] << 8) & 0x00ff0000) | (dataFormat[1] & 0x0000ff00) | ((dataFormat[0] >> 8) & 0x000000ff); \ -t8 = ((dataFormat[3] << 24) & 0xff000000) | ((dataFormat[2] << 16) & 0x00ff0000) | ((dataFormat[1] << 8) & 0x0000ff00) | (dataFormat[0] & 0x000000ff); \ -unpuck32(t8); unpuck32(t8); \ -unpuck32(t1); unpuck32(t1); \ -unpuck32(t2); unpuck32(t2); \ -unpuck32(t3); unpuck32(t3); \ -((u32*)out)[0] = U32BIG(t8); \ -((u32*)out)[1] = U32BIG(t1); \ -((u32*)out)[2] = U32BIG(t2); \ -((u32*)out)[3] = U32BIG(t3); \ +#define unpackU128FormatToFourPacket( out, in) {\ + t[3] = (in[3] & 0xff000000 )| ((in[2] >> 8) & 0x00ff0000) \ + | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); \ + t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) \ + | ((in[1] >> 8) & 0x0000ff00) | ((in[0] >> 16) & 0x000000ff); \ + t[1] = ((in[3] << 16) & 0xff000000) | ((in[2] << 8) & 0x00ff0000) \ + | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); \ + t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) \ + | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); \ + unpuckU32ToFour(t[0]); \ + unpuckU32ToFour(t[1]); \ + unpuckU32ToFour(t[2]); \ + unpuckU32ToFour(t[3]); \ + memcpy(out, t, 16 * sizeof(unsigned char)); \ } -#define packU64FormatToFourPacket( out, in) {\ -t1 = U32BIG(((u32*)in)[0]); \ -t2 = U32BIG(((u32*)in)[1]); \ -puck32(t1); \ -puck32(t1); \ -puck32(t2); \ -puck32(t2); \ -out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); \ -out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); \ -out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); \ -out[0] = ((t2 << 8) & 0x0000ff00) | (t1 & 0x000000ff); \ -} #define BIT_LOTR32_16(t0,t1,t2,t3,t4,t5,t6,t7){\ t4= LOTR32(t0, 4);\ t5 = LOTR32(t1, 4);\ @@ -90,11 +78,11 @@ t6 = LOTR32(t1, 6); \ t7 = LOTR32(t2, 6); \ } -#define ROUND512( arr,lunNum) {\ -s[3] ^= (arr[lunNum] >> 6) & 0x3;\ -s[2] ^= (arr[lunNum] >> 4) & 0x3;\ -s[1] ^= (arr[lunNum] >> 2) & 0x3;\ -s[0] ^= arr[lunNum] & 0x3;\ +#define ROUND512( lunNum) {\ +s[3] ^= (constant7Format_aead[lunNum] >> 6) & 0x3;\ +s[2] ^= (constant7Format_aead[lunNum] >> 4) & 0x3;\ +s[1] ^= (constant7Format_aead[lunNum] >> 2) & 0x3;\ +s[0] ^= constant7Format_aead[lunNum] & 0x3;\ sbox(s[3], s[7], s[11], s[15], s_temp[7], s_temp[11], s_temp[15]);\ sbox(s[2], s[6], s[10], s[14], s[7] , s_temp[10], s_temp[14]);\ sbox(s[1], s[5], s[9], s[13], s[6] , s_temp[9], s_temp[13]);\ diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot256/armcortexm_3/encrypt.c index d8d1476..b3c03e5 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_3/encrypt.c +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3/encrypt.c @@ -1,157 +1,51 @@ - #include"auxFormat.h" #define aead_RATE (128 / 8) #define PR0_ROUNDS 100 #define PR_ROUNDS 52 #define PRF_ROUNDS 56 -unsigned char constant7Format_aead[127] = { - /*constant7_aead_256*/ -0x1, -0x4, -0x10, -0x40, -0x2, -0x8, -0x21, -0x5, -0x14, -0x50, -0x42, -0xa, -0x29, -0x24, -0x11, -0x44, -0x12, -0x48, -0x23, -0xd, -0x35, -0x55, -0x56, -0x5a, -0x6b, -0x2e, -0x38, -0x60, -0x3, -0xc, -0x31, -0x45, -0x16, -0x58, -0x63, -0xf, -0x3d, -0x74, -0x53, -0x4e, -0x3b, -0x6c, -0x32, -0x49, -0x27, -0x1d, -0x75, -0x57, -0x5e, -0x7b, -0x6e, -0x3a, -0x68, -0x22, -0x9, -0x25, -0x15, -0x54, -0x52, -0x4a, -0x2b, -0x2c, -0x30, -0x41, -0x6, -0x18, -0x61, -0x7, -0x1c, -0x71, -0x47, -0x1e, -0x79, -0x66, -0x1b, -0x6d, -0x36, -0x59, -0x67, -0x1f, -0x7d, -0x76, -0x5b, -0x6f, -0x3e, -0x78, -0x62, -0xb, -0x2d, -0x34, -0x51, -0x46, -0x1a, -0x69, -0x26, -0x19, -0x65, -0x17, -0x5c, -0x73, -0x4f, -0x3f, -0x7c, -0x72, -0x4b, -0x2f, -0x3c, -0x70, -0x43, -0xe, -0x39, -0x64, -0x13, -0x4c, -0x33, -0x4d, -0x37, -0x5d, -0x77, -0x5f, -0x7f, -0x7e, -0x7a, -0x6a, -0x2a, -0x28, -0x20, -}; +/* + + #define PR0_ROUNDS 100 + #define PR_ROUNDS 76 + #define PRF_ROUNDS 80 + #define PR0_ROUNDS 100 + #define PR_ROUNDS 52 + #define PRF_ROUNDS 56 + * */ +unsigned char constant7Format_aead[100] = { 0x01, 0x04, 0x10, 0x40, 0x02, 0x08, + 0x21, 0x05, 0x14, 0x50, 0x42, 0x0a, 0x29, 0x24, 0x11, 0x44, 0x12, 0x48, + 0x23, 0x0d, 0x35, 0x55, 0x56, 0x5a, 0x6b, 0x2e, 0x38, 0x60, 0x03, 0x0c, + 0x31, 0x45, 0x16, 0x58, 0x63, 0x0f, 0x3d, 0x74, 0x53, 0x4e, 0x3b, 0x6c, + 0x32, 0x49, 0x27, 0x1d, 0x75, 0x57, 0x5e, 0x7b, 0x6e, 0x3a, 0x68, 0x22, + 0x09, 0x25, 0x15, 0x54, 0x52, 0x4a, 0x2b, 0x2c, 0x30, 0x41, 0x06, 0x18, + 0x61, 0x07, 0x1c, 0x71, 0x47, 0x1e, 0x79, 0x66, 0x1b, 0x6d, 0x36, 0x59, + 0x67, 0x1f, 0x7d, 0x76, 0x5b, 0x6f, 0x3e, 0x78, 0x62, 0x0b, 0x2d, 0x34, + 0x51, 0x46, 0x1a, 0x69, 0x26, 0x19, 0x65, 0x17, 0x5c, 0x73, }; +#define Processing_Data(data) \ +do { \ + packU128FormatToFourPacket(dataFormat, data); \ + s[0] ^= dataFormat[0]; \ + s[1] ^= dataFormat[1]; \ + s[2] ^= dataFormat[2]; \ + s[3] ^= dataFormat[3]; \ +} while (0) -int crypto_aead_encrypt( - unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k -) { - u32 i ; +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 i; + u32 t[4] = { 0 }; u32 s_temp[16] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; u32 s[16] = { 0 }; u32 dataFormat[4] = { 0 }; - u8 tempData[16] = {0}; + u8 tempData[16] = { 0 }; + u8 tempU8[32] = { 0 }; *clen = mlen + CRYPTO_ABYTES; //initialization packU128FormatToFourPacket(s, npub); @@ -159,18 +53,15 @@ int crypto_aead_encrypt( packU128FormatToFourPacket((s + 8), k); packU128FormatToFourPacket((s + 12), (k + 16)); for (i = 0; i < PR0_ROUNDS; i++) { - ROUND512(constant7Format_aead,i); + ROUND512(i); } // process associated data + //PAD(adlen, ad); if (adlen) { while (adlen >= aead_RATE) { - packU128FormatToFourPacket(dataFormat, ad); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - s[3] ^= dataFormat[3]; + Processing_Data(ad); for (i = 0; i < PR_ROUNDS; i++) { - ROUND512(constant7Format_aead, i); + ROUND512(i); } adlen -= aead_RATE; ad += aead_RATE; @@ -178,26 +69,19 @@ int crypto_aead_encrypt( memset(tempData, 0, sizeof(tempData)); memcpy(tempData, ad, adlen * sizeof(unsigned char)); tempData[adlen] = 0x01; - packU128FormatToFourPacket(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - s[3] ^= dataFormat[3]; + Processing_Data(tempData); for (i = 0; i < PR_ROUNDS; i++) { - ROUND512(constant7Format_aead, i); + ROUND512(i); } } s[15] ^= 0x80000000; + // process p data if (mlen) { while (mlen >= aead_RATE) { - packU128FormatToFourPacket(dataFormat, m); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - s[3] ^= dataFormat[3]; + Processing_Data(m); unpackU128FormatToFourPacket(c, s); for (i = 0; i < PR_ROUNDS; i++) { - ROUND512(constant7Format_aead, i); + ROUND512(i); } mlen -= aead_RATE; m += aead_RATE; @@ -205,44 +89,37 @@ int crypto_aead_encrypt( } memset(tempData, 0, sizeof(tempData)); memcpy(tempData, m, mlen * sizeof(unsigned char)); - tempData[mlen]= 0x01; - packU128FormatToFourPacket(dataFormat, tempData); - s[0] ^= dataFormat[0]; - s[1] ^= dataFormat[1]; - s[2] ^= dataFormat[2]; - s[3] ^= dataFormat[3]; + tempData[mlen] = 0x01; + Processing_Data(tempData); unpackU128FormatToFourPacket(tempData, s); memcpy(c, tempData, mlen * sizeof(unsigned char)); c += mlen; } // finalization for (i = 0; i < PRF_ROUNDS; i++) { - ROUND512(constant7Format_aead, i); + ROUND512(i); } - // return tag - unpackU128FormatToFourPacket(c, s); - unpackU128FormatToFourPacket((c+16), (s+4)); + unpackU128FormatToFourPacket(tempU8, s); + unpackU128FormatToFourPacket((tempU8 + 16), (s + 4)); + memcpy(c, tempU8, CRYPTO_ABYTES * sizeof(unsigned char)); return 0; } -int crypto_aead_decrypt( - unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k -){ +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { u32 s_temp[16] = { 0 }; + u32 t[4] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; - u8 i ; + u8 i; // initialization u32 s[16] = { 0 }; u32 dataFormat_1[4] = { 0 }; - u32 dataFormat_2[4] = { 0 }; + u32 dataFormat[4] = { 0 }; u8 tempData[16] = { 0 }; u8 tempU8[64] = { 0 }; - + if (clen < CRYPTO_ABYTES) return -1; *mlen = clen - CRYPTO_ABYTES; @@ -252,77 +129,77 @@ int crypto_aead_decrypt( packU128FormatToFourPacket((s + 8), k); packU128FormatToFourPacket((s + 12), (k + 16)); for (i = 0; i < PR0_ROUNDS; i++) { - ROUND512(constant7Format_aead, i); + ROUND512(i); } // process associated data if (adlen) { while (adlen >= aead_RATE) { - packU128FormatToFourPacket(dataFormat_2, ad); - s[0] ^= dataFormat_2[0]; - s[1] ^= dataFormat_2[1]; - s[2] ^= dataFormat_2[2]; - s[3] ^= dataFormat_2[3]; + Processing_Data(ad); for (i = 0; i < PR_ROUNDS; i++) { - ROUND512(constant7Format_aead, i); + ROUND512(i); } adlen -= aead_RATE; ad += aead_RATE; } memset(tempData, 0, sizeof(tempData)); - memcpy(tempData, ad, adlen * sizeof(unsigned char)); tempData[adlen] = 0x01; - packU128FormatToFourPacket(dataFormat_2, tempData); - s[0] ^= dataFormat_2[0]; - s[1] ^= dataFormat_2[1]; - s[2] ^= dataFormat_2[2]; - s[3] ^= dataFormat_2[3]; + Processing_Data(tempData); for (i = 0; i < PR_ROUNDS; i++) { - ROUND512(constant7Format_aead, i); + ROUND512(i); } } s[15] ^= 0x80000000; + // process c data clen = clen - CRYPTO_KEYBYTES; - if (clen) { while (clen >= aead_RATE) { - packU128FormatToFourPacket(dataFormat_2, c); - dataFormat_1[0] = s[0] ^ dataFormat_2[0]; - dataFormat_1[1] = s[1] ^ dataFormat_2[1]; - dataFormat_1[2] = s[2] ^ dataFormat_2[2]; - dataFormat_1[3] = s[3] ^ dataFormat_2[3]; + packU128FormatToFourPacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + dataFormat_1[3] = s[3] ^ dataFormat[3]; unpackU128FormatToFourPacket(m, dataFormat_1); - s[0] = dataFormat_2[0]; - s[1] = dataFormat_2[1]; - s[2] = dataFormat_2[2]; - s[3] = dataFormat_2[3]; + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; for (i = 0; i < PR_ROUNDS; i++) { - ROUND512(constant7Format_aead, i); + ROUND512(i); } clen -= aead_RATE; m += aead_RATE; c += aead_RATE; } unpackU128FormatToFourPacket(tempU8, s); - for (i = 0; i < clen; ++i, ++m, ++c) - { - *m = tempU8[i] ^ *c; - tempU8[i] = *c; - } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG( + ((u32* )tempData)[2]); + U32BIG(((u32*)tempU8)[3]) ^= U32BIG( + ((u32* )tempData)[3]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; tempU8[i] ^= 0x01; packU128FormatToFourPacket(s, tempU8); } // finalization for (i = 0; i < PRF_ROUNDS; i++) { - ROUND512(constant7Format_aead, i); + ROUND512(i); + } - // return tag unpackU128FormatToFourPacket(tempU8, s); unpackU128FormatToFourPacket((tempU8 + 16), (s + 4)); - if (memcmp((void*)tempU8, (void*)c, CRYPTO_ABYTES)) { + if (memcmp((void*) tempU8, (void*) c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); *mlen = 0; - memset(m, 0, sizeof(unsigned char) * (clen - CRYPTO_ABYTES)); return -1; } return 0; -} \ No newline at end of file +} diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.c index 63f20be..0c1bb90 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.c @@ -26,15 +26,10 @@ void packU128FormatToFourPacket(u32 * out, u8 * in) { u32 t1 = U32BIG(((u32* )in)[1]); u32 t2 = U32BIG(((u32* )in)[2]); u32 t3 = U32BIG(((u32* )in)[3]); - u32 temp1; - puck32(t0); - puck32(t0); - puck32(t1); - puck32(t1); - puck32(t2); - puck32(t2); - puck32(t3); - puck32(t3); + puckU32ToFour(t0); \ + puckU32ToFour(t1); \ + puckU32ToFour(t2); \ + puckU32ToFour(t3); \ out[3] = (t3 & 0xff000000) | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t0 >> 24); out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) @@ -46,7 +41,6 @@ void packU128FormatToFourPacket(u32 * out, u8 * in) { } void unpackU128FormatToFourPacket(u8 * out, u32 * in) { u32 t[4] = { 0 }; - u32 r0; t[3] = (in[3] & 0xff000000 )| ((in[2] >> 8) & 0x00ff0000) | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) @@ -55,14 +49,10 @@ void unpackU128FormatToFourPacket(u8 * out, u32 * in) { | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); - unpuck32(t[0]); - unpuck32(t[0]); - unpuck32(t[1]); - unpuck32(t[1]); - unpuck32(t[2]); - unpuck32(t[2]); - unpuck32(t[3]); - unpuck32(t[3]); + unpuckU32ToFour(t[0]); + unpuckU32ToFour(t[1]); + unpuckU32ToFour(t[2]); + unpuckU32ToFour(t[3]); memcpy(out, t, 16 * sizeof(unsigned char)); } diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.h index 37a8c45..14e62be 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_4/auxFormat.h @@ -1,6 +1,8 @@ #include"crypto_aead.h" #include"api.h" #include +#include +#include #define U32BIG(x) (x) #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) @@ -14,24 +16,31 @@ typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; -void printU8(char name[], u8 var[], long len, int offset); -//new -void puckU8FormatToFourPacket(u8 in, u8 *out); - -#define puck32(in)\ -{\ -temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ -temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ -temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ -temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ +#define puckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ } -#define unpuck32(t0){\ - r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ - r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ - r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ - r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ +#define unpuckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ } +void unpackU128FormatToFourPacket(u8 * out, u32 * in) ; + +void packU128FormatToFourPacket(u32 * out, u8 * in) ; + +void P512(unsigned int *s, unsigned char *round, unsigned char rounds); + #define BIT_LOTR32_16(t0,t1,t2,t3,t4,t5,t6,t7){\ t4= LOTR32(t0, 4);\ diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_4/encrypt.c b/knot/Implementations/crypto_aead/knot256/armcortexm_4/encrypt.c index 31a9ce9..935a36d 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_4/encrypt.c +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_4/encrypt.c @@ -4,6 +4,16 @@ #define PR0_ROUNDS 100 #define PR_ROUNDS 52 #define PRF_ROUNDS 56 +/* + +#define PR0_ROUNDS 100 +#define PR_ROUNDS 76 +#define PRF_ROUNDS 80 + +#define PR0_ROUNDS 100 +#define PR_ROUNDS 52 +#define PRF_ROUNDS 56 + * */ void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { packU128FormatToFourPacket(s, npub); packU128FormatToFourPacket(s + 4, npub + 16); @@ -92,7 +102,7 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigned long long clen) { u32 dataFormat[8] = { 0 }; u32 dataFormat_1[4] = { 0 }; - u8 i, tempU8[64] = { 0 }; + u8 tempData[64] = { 0 }, tempU8[64] = { 0 }; if (clen) { while (clen >= aead_RATE) { packU128FormatToFourPacket(dataFormat, c); @@ -111,11 +121,20 @@ void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, c += aead_RATE; } unpackU128FormatToFourPacket(tempU8, s); - for (i = 0; i < clen; ++i, ++m, ++c) { - *m = tempU8[i] ^ *c; - tempU8[i] = *c; - } - tempU8[i] ^= 0x01; + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG( + ((u32* )tempData)[2]); + U32BIG(((u32*)tempU8)[3]) ^= U32BIG( + ((u32* )tempData)[3]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; packU128FormatToFourPacket(s, tempU8); } } diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_5/auxFormat.c b/knot/Implementations/crypto_aead/knot256/armcortexm_5/auxFormat.c index 564849d..828caf4 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_5/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_5/auxFormat.c @@ -60,15 +60,10 @@ void packU128FormatToFourPacket(u32 *out, u8 *in) { u32 t1 = U32BIG(((u32* )in)[1]); u32 t2 = U32BIG(((u32* )in)[2]); u32 t3 = U32BIG(((u32* )in)[3]); - u32 temp1; - puck32(t0); - puck32(t0); - puck32(t1); - puck32(t1); - puck32(t2); - puck32(t2); - puck32(t3); - puck32(t3); + puckU32ToFour(t0); + puckU32ToFour(t1); + puckU32ToFour(t2); + puckU32ToFour(t3); out[3] = (t3 & 0xff000000) | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t0 >> 24); out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) @@ -80,7 +75,6 @@ void packU128FormatToFourPacket(u32 *out, u8 *in) { } void unpackU128FormatToFourPacket(u8 *out, u32 *in) { u32 t[4] = { 0 }; - u32 r0; t[3] = (in[3] & 0xff000000) | ((in[2] >> 8) & 0x00ff0000) | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) @@ -89,26 +83,10 @@ void unpackU128FormatToFourPacket(u8 *out, u32 *in) { | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); - unpuck32(t[0]); - unpuck32(t[0]); - unpuck32(t[1]); - unpuck32(t[1]); - unpuck32(t[2]); - unpuck32(t[2]); - unpuck32(t[3]); - unpuck32(t[3]); + unpuckU32ToFour(t[0]); + unpuckU32ToFour(t[1]); + unpuckU32ToFour(t[2]); + unpuckU32ToFour(t[3]); memcpy(out, t, 16 * sizeof(unsigned char)); } -void packU64FormatToFourPacket(u32 *out, u8 *in) { - u32 t1, t2, temp1; - t1 = U32BIG(((u32* )in)[0]); - t2 = U32BIG(((u32* )in)[1]); - puck32(t1); - puck32(t1); - puck32(t2); - puck32(t2); - out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); - out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); - out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); - out[0] = ((t2 << 8) & 0x0000ff00) | (t1 & 0x000000ff); -} + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_5/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_5/auxFormat.h index 288571a..9062aac 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_5/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_5/auxFormat.h @@ -1,6 +1,8 @@ #include"crypto_aead.h" #include"api.h" #include +#include +#include #define U32BIG(x) (x) void P512(unsigned int *s, unsigned char *round, unsigned char rounds); @@ -11,18 +13,24 @@ typedef unsigned long long u64; void printU8(char name[], u8 var[], long len, int offset); -#define puck32(in)\ -{\ -temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ -temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ -temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ -temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ + +#define puckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ } -#define unpuck32(t0){\ - r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ - r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ - r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ - r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ +#define unpuckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ } //t1 #define ARC(rci) \ diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.c index d38cf15..8699194 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.c @@ -63,15 +63,11 @@ void packU128FormatToFourPacket(u32 *out, u8 *in) { u32 t1 = U32BIG(((u32* )in)[1]); u32 t2 = U32BIG(((u32* )in)[2]); u32 t3 = U32BIG(((u32* )in)[3]); - u32 temp1; - puck32(t0); - puck32(t0); - puck32(t1); - puck32(t1); - puck32(t2); - puck32(t2); - puck32(t3); - puck32(t3); + puckU32ToFour(t0); + puckU32ToFour(t1); + puckU32ToFour(t2); + puckU32ToFour(t3); + out[3] = (t3 & 0xff000000) | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t0 >> 24); out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) @@ -83,7 +79,6 @@ void packU128FormatToFourPacket(u32 *out, u8 *in) { } void unpackU128FormatToFourPacket(u8 *out, u32 *in) { u32 t[4] = { 0 }; - u32 r0; t[3] = (in[3] & 0xff000000) | ((in[2] >> 8) & 0x00ff0000) | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) @@ -92,26 +87,10 @@ void unpackU128FormatToFourPacket(u8 *out, u32 *in) { | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); - unpuck32(t[0]); - unpuck32(t[0]); - unpuck32(t[1]); - unpuck32(t[1]); - unpuck32(t[2]); - unpuck32(t[2]); - unpuck32(t[3]); - unpuck32(t[3]); + unpuckU32ToFour(t[0]); + unpuckU32ToFour(t[1]); + unpuckU32ToFour(t[2]); + unpuckU32ToFour(t[3]); + memcpy(out, t, 16 * sizeof(unsigned char)); } -void packU64FormatToFourPacket(u32 *out, u8 *in) { - u32 t1, t2, temp1; - t1 = U32BIG(((u32* )in)[0]); - t2 = U32BIG(((u32* )in)[1]); - puck32(t1); - puck32(t1); - puck32(t2); - puck32(t2); - out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); - out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); - out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); - out[0] = ((t2 << 8) & 0x0000ff00) | (t1 & 0x000000ff); -} diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.h index 2314568..679abfd 100644 --- a/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.h @@ -1,6 +1,8 @@ #include"crypto_aead.h" #include"api.h" #include +#include +#include #define U32BIG(x) (x) void P512(unsigned int *s, unsigned char *round, unsigned char rounds); @@ -10,19 +12,25 @@ typedef unsigned int u32; typedef unsigned long long u64; void printU8(char name[], u8 var[], long len, int offset); -#define puck32(in)\ -{\ -temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ -temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ -temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ -temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ +#define puckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ } -#define unpuck32(t0){\ - r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ - r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ - r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ - r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ +#define unpuckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ } + //t1 #define P_512_ARC(rci) \ do { \ diff --git a/knot/Implementations/crypto_hash/knot256v1/LWC_HASH_KAT_256.txt b/knot/Implementations/crypto_hash/knot256v1/LWC_HASH_KAT_256.txt index 0a43090..1b55ad9 100644 --- a/knot/Implementations/crypto_hash/knot256v1/LWC_HASH_KAT_256.txt +++ b/knot/Implementations/crypto_hash/knot256v1/LWC_HASH_KAT_256.txt @@ -1028,6 +1028,7 @@ MD = 8F10A1FDCA1BACFC1CDBEDB01D88D58AA59EB73812DCC2931F2FB0C8D982290E Count = 258 Msg = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F707172737475767778797A7B7C7D7E7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9FA0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7C8C9CACBCCCDCECFD0D1D2D3D4D5D6D7D8D9DADBDCDDDEDFE0E1E2E3E4E5E6E7E8E9EAEBECEDEEEFF0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF00 + MD = 0824ECB2CE26B4448D8B0738872AB7BD2010F85042578EEE491DE9442790AF73 Count = 259 diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.c index ace0aa1..e46d5e6 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.c @@ -1,87 +1,82 @@ #include"auxFormat.h" -void packU32FormatToThreePacket(u32 * out, u8 * in) { - u32 t2 = U32BIG(((u32*)in)[0]); - out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;//temp2[0] 0;temp2[1] 1;temp2[2] 2; - puckU32ToThree_1(out[0]); - puckU32ToThree_1(out[1]); - puckU32ToThree_1(out[2]); + +void packU32FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32* )in)[0]); + puckU32ToThree_3(t0); + out[0] = ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); + out[1] = (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) + | ((t0 & 0xff00) >> 8)); + out[2] = (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) + | (t0 & 0x000000ff)); } -void unpackU32FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - temp0[0] = in[0] & 0x3ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[2] & 0x7ff; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); +void unpackU32FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 };\ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) + | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) + | ((t1 & 0xff) << 8) | (t2 & 0xff); + unpuckU32ToThree_3(t[0]); + *(u32*) (out) = t[0]; } - - -void packU96FormatToThreePacket(u32 * out, u8 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; - puckU32ToThree_1(temp2[0]); - puckU32ToThree_1(temp2[1]); - puckU32ToThree_1(temp2[2]); - out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; - out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; - out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32* )in)[0]), t1 = U32BIG(((u32* )in)[1]), t2 = U32BIG( + ((u32* )in)[2]); + puckU32ToThree_3(t0); + puckU32ToThree_3(t1); + puckU32ToThree_3(t2); + out[0] = (((t2 & 0x80000000)) | ((t2 & 0xc000000) << 3) + | ((t2 & 0xff00) << 13)) + | (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) + | ((t1 & 0x000000ff) << 10)) + | ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); + out[1] = (((t2 & 0x40000000) << 1) | ((t2 & 0x03000000) << 5) + | ((t2 & 0x000000ff) << 21)) + | ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) + | (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) + | ((t0 & 0xff00) >> 8))); + out[2] = ((((t2 & 0x30000000) << 2) | ((t2 & 0xff0000) << 6)) + | (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) + | ((t1 & 0xff00) << 3)) + | (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) + | (t0 & 0x000000ff))); } -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0]>>10) & 0x7ff; - temp1[1] = (in[2] >>11 ) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 };\ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) + | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) + | ((t1 & 0xff) << 8) | (t2 & 0xff); + t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) + | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) + | ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) + | ((t2 & 0x7f800) >> 3) | ((t0 & 0x3fc00) >> 10); + t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) + | ((t2 & 0xc0000000) >> 2) | ((t0 & 0x60000000) >> 3) + | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) + | ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); + unpuckU32ToThree_3(t[0]); + unpuckU32ToThree_3(t[1]); + unpuckU32ToThree_3(t[2]); memcpy(out, t, 12 * sizeof(unsigned char)); } -unsigned char constant7Format[80] = { - /*constant7Format[127]: 12*6=72*/ - 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, - 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, - 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, - 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, - 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, - 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, - 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,}; + +unsigned char constant7Format[80] = { +/*constant7Format[127]: 12*6=72*/ +0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, + 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, + 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, + 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, + 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, + 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, + 0x1e, 0xd1, 0x8f, 0x5c, 0xc3, 0x1f, 0xd9, }; void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { - u32 rci,t1,t2; - unsigned char i; + u32 rci, t1, t2; + unsigned char i; for (i = 0; i < lunNum; i++) { - rci=constant7Format[i];\ - P384_ARC_SC1(rci,s[3],s[6],s[9]); \ + rci = constant7Format[i];\ + P384_ARC_SC1(rci, s[3], s[6], s[9]); P384_2SC(s[1],s[4],s[7],s[10],s[2],s[5],s[8],s[11]);\ P384_SR();\ + } } diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.h index 8de30c3..8bbc86f 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.h @@ -9,19 +9,23 @@ typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } unsigned char constant7Format[80]; diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/auxFormat.h index 16bd047..3114169 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/auxFormat.h @@ -13,7 +13,6 @@ typedef unsigned long long u64; #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) - #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define sbox(a, b, c, d, f, g, h) \ { \ @@ -30,12 +29,6 @@ t3= LOTR32(t1, 18); \ t4 = LOTR32(t2, 18);\ t5 = LOTR32(t0, 19); \ } -/* -s0 s1 s2 -s3 s4 s5 -s6 s7 s8 -s9 s10 s11 -*/ #define ROUND384(lunNum) {\ s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ @@ -48,82 +41,70 @@ U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ } -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } -#define packU32FormatToThreePacket(out, in) { \ - u32 t2 = U32BIG(((u32*)in)[0]); \ - out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;\ - puckU32ToThree_1(out[0]); \ - puckU32ToThree_1(out[1]); \ - puckU32ToThree_1(out[2]); \ +#define packU32FormatToThreePacket( out, in) {\ +u32 t0 = U32BIG(((u32*)in)[0]); \ +puckU32ToThree_3(t0); \ +out[0] = ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ +out[1] = (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8)); \ +out[2] = (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ } -#define unpackU32FormatToThreePacket(out, in) { \ - u32 temp0[3] = { 0 }; \ - temp0[0] = in[0] & 0x3ff; \ - temp0[1] = in[1] & 0x7ff; \ - temp0[2] = in[2] & 0x7ff; \ - unpuckU32ToThree_1(temp0[0]); \ - unpuckU32ToThree_1(temp0[1]); \ - unpuckU32ToThree_1(temp0[2]); \ - *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); \ +#define unpackU32FormatToThreePacket(out, in) {\ +u32 t[3] = { 0 } ;\ +u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ +t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ +((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ +((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ +unpuckU32ToThree_3(t[0]); \ +*(u32*)(out) = t[0]; \ } -#define packU96FormatToThreePacket(out, in) { \ - u32 temp0[3] = { 0 }; \ - u32 temp1[3] = { 0 }; \ - u32 temp2[3] = { 0 }; \ - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; \ - puckU32ToThree_1(temp0[0]); \ - puckU32ToThree_1(temp0[1]); \ - puckU32ToThree_1(temp0[2]); \ - temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; \ - puckU32ToThree_1(temp1[0]); \ - puckU32ToThree_1(temp1[1]); \ - puckU32ToThree_1(temp1[2]); \ - temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; \ - puckU32ToThree_1(temp2[0]); \ - puckU32ToThree_1(temp2[1]); \ - puckU32ToThree_1(temp2[2]); \ - out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; \ - out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; \ - out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; \ +#define packU96FormatToThreePacket( out, in) { \ + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ + out[0] = (((t2 & 0x80000000)) | ((t2 & 0xc000000) << 3) | ((t2 & 0xff00) << 13)) | \ + (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ + ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ + out[1] = (((t2 & 0x40000000) << 1) | ((t2 & 0x03000000) << 5) | ((t2 & 0x000000ff) << 21)) | \ + ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ + (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ + out[2] = ((((t2 & 0x30000000) << 2) | ((t2 & 0xff0000) << 6)) | \ + (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ + (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff))); \ } -#define unpackU96FormatToThreePacket(out, in) { \ - u32 temp0[3] = { 0 }; \ - u32 temp1[3] = { 0 }; \ - u32 temp2[3] = { 0 }; \ - u32 t[3] = { 0 }; \ - temp0[0] = in[2] & 0x7ff; \ - temp0[1] = in[1] & 0x7ff; \ - temp0[2] = in[0] & 0x3ff; \ - temp1[0] = (in[0]>>10) & 0x7ff; \ - temp1[1] = (in[2] >>11 ) & 0x7ff; \ - temp1[2] = (in[1] >> 11) & 0x3ff; \ - temp2[0] = in[1] >> 21; \ - temp2[1] = in[0] >> 21; \ - temp2[2] = in[2] >> 22; \ - unpuckU32ToThree_1(temp0[0]); \ - unpuckU32ToThree_1(temp0[1]); \ - unpuckU32ToThree_1(temp0[2]); \ - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; \ - unpuckU32ToThree_1(temp1[0]); \ - unpuckU32ToThree_1(temp1[1]); \ - unpuckU32ToThree_1(temp1[2]); \ - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; \ - unpuckU32ToThree_1(temp2[0]); \ - unpuckU32ToThree_1(temp2[1]); \ - unpuckU32ToThree_1(temp2[2]); \ - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; \ - memcpy(out, t, 12 * sizeof(unsigned char)); \ + +#define unpackU96FormatToThreePacket( out, in) {\ + u32 t[3] = { 0 };\ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ + ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ + ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ + t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ + ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ + ((t0 & 0x3fc00) >> 10); \ + t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ + ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ + ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ + unpuckU32ToThree_3(t[0]); \ + unpuckU32ToThree_3(t[1]); \ + unpuckU32ToThree_3(t[2]); \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ } diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/hash.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/hash.c index aa80405..e38acaa 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/hash.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/hash.c @@ -1,6 +1,7 @@ #include"auxFormat.h" -#define hash_RATE (128 / 8) +#define hash_RATE 16 +//#define hash_RATE (128 / 8) #define PRH_ROUNDS 80 //12*7=84 unsigned char constant7Format[80] = { @@ -43,7 +44,7 @@ int crypto_hash(unsigned char *out, const unsigned char *in, inlen -= hash_RATE; in += hash_RATE; } - memset(tempData, 0, sizeof(tempData)); + memset(tempData, 0, hash_RATE); memcpy(tempData, in, inlen * sizeof(unsigned char)); tempData[inlen] = 0x01; Processing_Data(tempData); diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.c index 6e79b71..6230f0b 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.c @@ -1,93 +1,86 @@ #include"auxFormat.h" -void packU32FormatToThreePacket(u32 * out, u8 * in) { - u32 t2 = U32BIG(((u32*)in)[0]); - out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;//temp2[0] 0;temp2[1] 1;temp2[2] 2; - puckU32ToThree_1(out[0]); - puckU32ToThree_1(out[1]); - puckU32ToThree_1(out[2]); + +void packU32FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32* )in)[0]); + puckU32ToThree_3(t0); + out[0] = ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); + out[1] = (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) + | ((t0 & 0xff00) >> 8)); + out[2] = (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) + | (t0 & 0x000000ff)); } -void unpackU32FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - temp0[0] = in[0] & 0x3ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[2] & 0x7ff; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); +void unpackU32FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 };\ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) + | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) + | ((t1 & 0xff) << 8) | (t2 & 0xff); + unpuckU32ToThree_3(t[0]); + *(u32*) (out) = t[0]; } - - -void packU96FormatToThreePacket(u32 * out, u8 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; - puckU32ToThree_1(temp2[0]); - puckU32ToThree_1(temp2[1]); - puckU32ToThree_1(temp2[2]); - out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; - out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; - out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32* )in)[0]), t1 = U32BIG(((u32* )in)[1]), t2 = U32BIG( + ((u32* )in)[2]); + puckU32ToThree_3(t0); + puckU32ToThree_3(t1); + puckU32ToThree_3(t2); + out[0] = (((t2 & 0x80000000)) | ((t2 & 0xc000000) << 3) + | ((t2 & 0xff00) << 13)) + | (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) + | ((t1 & 0x000000ff) << 10)) + | ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); + out[1] = (((t2 & 0x40000000) << 1) | ((t2 & 0x03000000) << 5) + | ((t2 & 0x000000ff) << 21)) + | ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) + | (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) + | ((t0 & 0xff00) >> 8))); + out[2] = ((((t2 & 0x30000000) << 2) | ((t2 & 0xff0000) << 6)) + | (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) + | ((t1 & 0xff00) << 3)) + | (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) + | (t0 & 0x000000ff))); } -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0]>>10) & 0x7ff; - temp1[1] = (in[2] >>11 ) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 };\ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) + | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) + | ((t1 & 0xff) << 8) | (t2 & 0xff); + t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) + | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) + | ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) + | ((t2 & 0x7f800) >> 3) | ((t0 & 0x3fc00) >> 10); + t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) + | ((t2 & 0xc0000000) >> 2) | ((t0 & 0x60000000) >> 3) + | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) + | ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); + unpuckU32ToThree_3(t[0]); + unpuckU32ToThree_3(t[1]); + unpuckU32ToThree_3(t[2]); memcpy(out, t, 12 * sizeof(unsigned char)); } -unsigned char constant7Format[80] = { - /*constant7Format[127]: 12*6=72*/ - 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, - 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, - 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, - 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, - 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, - 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, - 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,}; +unsigned char constant7Format[80] = { +/*constant7Format[127]: 12*6=72*/ +0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, + 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, + 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, + 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, + 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, + 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, + 0x1e, 0xd1, 0x8f, 0x5c, 0xc3, 0x1f, 0xd9, }; void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { u32 s_temp[12] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; - unsigned char i; + unsigned char i; for (i = 0; i < lunNum; i++) { -s[0] ^= (round[i] >> 6) & 0x3;\ -s[1] ^= (round[i] >> 3) & 0x7;\ -s[2] ^= round[i] & 0x7;\ -sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ -sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ -sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ -s[5] = LOTR32(s_temp[3], 1); \ -U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ -U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ + s[0] ^= (round[i] >> 6) & 0x3; + s[1] ^= (round[i] >> 3) & 0x7; + s[2] ^= round[i] & 0x7; + sbox(s[0], s[3], s[6], s[9], s_temp[3], s_temp[6], s_temp[9]); + sbox(s[1], s[4], s[7], s[10], s[3], s_temp[7], s_temp[10]); + sbox(s[2], s[5], s[8], s[11], s[4], s_temp[8], s_temp[11]); + s[5] = LOTR32(s_temp[3], 1); + U96_BIT_LOTR32_8(s_temp[6], s_temp[7], s_temp[8], s[6], s[7], s[8]); + U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]); } } diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.h index 85f53af..ee7d2ea 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.h @@ -14,19 +14,23 @@ void unpackU96FormatToThreePacket(u8 * out, u32 * in) ; void packU96FormatToThreePacket(u32 * out, u8 * in); void unpackU32FormatToThreePacket(u8 * out, u32 * in); void packU32FormatToThreePacket(u32 * out, u8 * in); -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } unsigned char constant7Format[80]; diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/hash.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/hash.c index 9c3dafd..8066529 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/hash.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/hash.c @@ -1,8 +1,9 @@ #include"auxFormat.h" +#define hash_RATE 16 -#define hash_RATE (128 / 8) +//#define hash_RATE (128 / 8) #define PRH_ROUNDS 80 @@ -28,7 +29,7 @@ int crypto_hash(unsigned char *out, const unsigned char *in, inlen -= hash_RATE; in += hash_RATE; } - memset(tempData, 0, sizeof(tempData)); + memset(tempData, 0, hash_RATE); memcpy(tempData, in, inlen * sizeof(unsigned char)); tempData[inlen] = 0x01; packU96FormatToThreePacket(dataFormat, tempData); diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.c index 46cfe13..f7a5cc2 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.c @@ -1,96 +1,89 @@ #include"auxFormat.h" -void packU32FormatToThreePacket(u32 * out, u8 * in) { - u32 t2 = U32BIG(((u32*)in)[0]); - out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;//temp2[0] 0;temp2[1] 1;temp2[2] 2; - puckU32ToThree_1(out[0]); - puckU32ToThree_1(out[1]); - puckU32ToThree_1(out[2]); + +void packU32FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32* )in)[0]); + puckU32ToThree_3(t0); + out[0] = ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); + out[1] = (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) + | ((t0 & 0xff00) >> 8)); + out[2] = (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) + | (t0 & 0x000000ff)); } -void unpackU32FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - temp0[0] = in[0] & 0x3ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[2] & 0x7ff; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); +void unpackU32FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 };\ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) + | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) + | ((t1 & 0xff) << 8) | (t2 & 0xff); + unpuckU32ToThree_3(t[0]); + *(u32*) (out) = t[0]; } - - -void packU96FormatToThreePacket(u32 * out, u8 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; - puckU32ToThree_1(temp2[0]); - puckU32ToThree_1(temp2[1]); - puckU32ToThree_1(temp2[2]); - out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; - out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; - out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32* )in)[0]), t1 = U32BIG(((u32* )in)[1]), t2 = U32BIG( + ((u32* )in)[2]); + puckU32ToThree_3(t0); + puckU32ToThree_3(t1); + puckU32ToThree_3(t2); + out[0] = (((t2 & 0x80000000)) | ((t2 & 0xc000000) << 3) + | ((t2 & 0xff00) << 13)) + | (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) + | ((t1 & 0x000000ff) << 10)) + | ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); + out[1] = (((t2 & 0x40000000) << 1) | ((t2 & 0x03000000) << 5) + | ((t2 & 0x000000ff) << 21)) + | ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) + | (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) + | ((t0 & 0xff00) >> 8))); + out[2] = ((((t2 & 0x30000000) << 2) | ((t2 & 0xff0000) << 6)) + | (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) + | ((t1 & 0xff00) << 3)) + | (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) + | (t0 & 0x000000ff))); } -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0]>>10) & 0x7ff; - temp1[1] = (in[2] >>11 ) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 };\ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) + | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) + | ((t1 & 0xff) << 8) | (t2 & 0xff); + t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) + | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) + | ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) + | ((t2 & 0x7f800) >> 3) | ((t0 & 0x3fc00) >> 10); + t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) + | ((t2 & 0xc0000000) >> 2) | ((t0 & 0x60000000) >> 3) + | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) + | ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); + unpuckU32ToThree_3(t[0]); + unpuckU32ToThree_3(t[1]); + unpuckU32ToThree_3(t[2]); memcpy(out, t, 12 * sizeof(unsigned char)); } -void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum) {\ - unsigned int t,t1,t2; +void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { + unsigned int t, t1, t2; u32 rci; - rci=c[0]; + rci = c[0]; ROUND384_1(rci); t = 1; while (lunnum--) { - rci=c[t]; + rci = c[t]; ROUND384_2(rci); t++; - rci=c[t]; + rci = c[t]; ROUND384_3(rci); t++; - rci=c[t]; + rci = c[t]; ROUND384_4(rci); t++; } } -unsigned char constant7Format[80] = { - /*constant7Format[127]: 12*6=72*/ - 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, - 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, - 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, - 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, - 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, - 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, - 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,}; +unsigned char constant7Format[80] = { +/*constant7Format[127]: 12*6=72*/ +0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, + 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, + 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, + 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, + 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, + 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, + 0x1e, 0xd1, 0x8f, 0x5c, 0xc3, 0x1f, 0xd9, }; diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.h index 7017560..b133d3c 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.h @@ -15,19 +15,23 @@ void unpackU96FormatToThreePacket(u8 * out, u32 * in) ; void packU96FormatToThreePacket(u32 * out, u8 * in); void unpackU32FormatToThreePacket(u8 * out, u32 * in); void packU32FormatToThreePacket(u32 * out, u8 * in); -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } ////////////constant begin// unsigned char constant7Format[80]; diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c index 6cdc800..ac25779 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c @@ -1,70 +1,52 @@ #include"auxFormat.h" -void packU32FormatToThreePacket(u32 * out, u8 * in) { - u32 t2 = U32BIG(((u32*)in)[0]); - out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;//temp2[0] 0;temp2[1] 1;temp2[2] 2; - puckU32ToThree_1(out[0]); - puckU32ToThree_1(out[1]); - puckU32ToThree_1(out[2]); + +void packU32FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32*)in)[0]); \ + puckU32ToThree_3(t0); \ + out[0] = ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ +out[1] = (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8)); \ +out[2] = (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ } -void unpackU32FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - temp0[0] = in[0] & 0x3ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[2] & 0x7ff; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); +void unpackU32FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 } ;\ +u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ +t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ +((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ +((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ +unpuckU32ToThree_3(t[0]); \ +*(u32*)(out) = t[0]; \ } - - -void packU96FormatToThreePacket(u32 * out, u8 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; - puckU32ToThree_1(temp2[0]); - puckU32ToThree_1(temp2[1]); - puckU32ToThree_1(temp2[2]); - out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; - out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; - out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + puckU32ToThree_3(t2); \ + out[0] = (((t2 & 0x80000000)) | ((t2 & 0xc000000) << 3) | ((t2 & 0xff00) << 13)) | \ +(((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ +((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ +out[1] = (((t2 & 0x40000000) << 1) | ((t2 & 0x03000000) << 5) | ((t2 & 0x000000ff) << 21)) | \ +((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ +(((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ +out[2] = ((((t2 & 0x30000000) << 2) | ((t2 & 0xff0000) << 6)) | \ +(((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ +(((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff))); \ } -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0]>>10) & 0x7ff; - temp1[1] = (in[2] >>11 ) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; - memcpy(out, t, 12 * sizeof(unsigned char)); +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 };\ +u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ +t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ +((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ +((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ +t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ +((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ +((t0 & 0x3fc00) >> 10); \ +t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ +((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ +((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ +unpuckU32ToThree_3(t[0]); \ +unpuckU32ToThree_3(t[1]); \ +unpuckU32ToThree_3(t[2]); \ +memcpy(out, t, 12 * sizeof(unsigned char)); \ } void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { unsigned int t, t1, t2; diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h index 58eef3d..253d360 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h @@ -11,19 +11,23 @@ typedef unsigned int u32; typedef unsigned long long u64; void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum) ; -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } ////////////constant begin// unsigned char constant7Format[80]; diff --git a/knot/Implementations/crypto_hash/knot256v2/opt_1/hash.c b/knot/Implementations/crypto_hash/knot256v2/opt_1/hash.c index 976e4f1..a766a19 100644 --- a/knot/Implementations/crypto_hash/knot256v2/opt_1/hash.c +++ b/knot/Implementations/crypto_hash/knot256v2/opt_1/hash.c @@ -1,4 +1,5 @@ #include "api.h" +#include #define PRH_ROUNDS 80 typedef unsigned char u8; typedef unsigned long long u64; diff --git a/knot/Implementations/crypto_hash/knot384/LWC_HASH_KAT_384.txt b/knot/Implementations/crypto_hash/knot384/LWC_HASH_KAT_384.txt index 7703cd8..5abd4dd 100644 --- a/knot/Implementations/crypto_hash/knot384/LWC_HASH_KAT_384.txt +++ b/knot/Implementations/crypto_hash/knot384/LWC_HASH_KAT_384.txt @@ -512,6 +512,8 @@ MD = C4B43249BC06B1E248301DD99E41BA011CACAD9A9A32D554ED5CFE95FAA8468334D5A0F037F Count = 129 Msg = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F707172737475767778797A7B7C7D7E7F + + MD = 8686BA1F086BF103EE96DF3B4606CFDD911F3DAD5240BAE7BD2AE50C3959B060BE7773A1EA203F2188A9AED1C4CA2B3B Count = 130 diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.c index 9af9416..242a621 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.c @@ -1,52 +1,37 @@ #include"auxFormat.h" -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0]>>10) & 0x7ff; - temp1[1] = (in[2] >>11 ) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; - memcpy(out, t, 12 * sizeof(unsigned char)); -} +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 }; \ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ + ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ + ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ + t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ + ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ + ((t0 & 0x3fc00) >> 10); \ + t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ + ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ + ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ + unpuckU32ToThree_3(t[0]); \ + unpuckU32ToThree_3(t[1]); \ + unpuckU32ToThree_3(t[2]); \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ +} void packU48FormatToThreePacket(u32 * out, u8 * in) { - u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - out[0] = (temp1[0] << 10) | temp0[2]; - out[1] = (temp1[2] << 11) | temp0[1]; - out[2] = (temp1[1] << 11) | temp0[0]; + u32 t0 = U32BIG(*(u32*)(in)), t1 = (u32)U16BIG(*(u16*)(in + 4)); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + out[0] = \ + (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ + ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ + out[1] = \ + ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ + (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ + out[2] = \ + (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ + (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ } - - unsigned char constant7Format[104] = { /*constant7Format[127]: 12*9=108*/ 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.h index 374923b..3b9606b 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.h @@ -17,19 +17,23 @@ void packU48FormatToThreePacket(u32 * out, u8 * in) ; void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) ; void unpackU96FormatToThreePacket(u8 * out, u32 * in) ; -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } #define P384_ARC_SC1(rci,S2,S3,S4) \ do { \ diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_3/auxFormat.h index 6da2d0e..b2d5511 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_3/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_3/auxFormat.h @@ -22,66 +22,56 @@ typedef unsigned long long u64; { \ t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ } -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } + #define unpackU96FormatToThreePacket( out, in) {\ - u32 temp0[3] = { 0 }; \ - u32 temp1[3] = { 0 }; \ - u32 temp2[3] = { 0 }; \ - u32 t[3] = { 0 }; \ - temp0[0] = in[2] & 0x7ff; \ - temp0[1] = in[1] & 0x7ff; \ - temp0[2] = in[0] & 0x3ff; \ - temp1[0] = (in[0]>>10) & 0x7ff; \ - temp1[1] = (in[2] >>11 ) & 0x7ff; \ - temp1[2] = (in[1] >> 11) & 0x3ff; \ - temp2[0] = in[1] >> 21; \ - temp2[1] = in[0] >> 21; \ - temp2[2] = in[2] >> 22; \ - unpuckU32ToThree_1(temp0[0]); \ - unpuckU32ToThree_1(temp0[1]); \ - unpuckU32ToThree_1(temp0[2]); \ - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; \ - unpuckU32ToThree_1(temp1[0]); \ - unpuckU32ToThree_1(temp1[1]); \ - unpuckU32ToThree_1(temp1[2]); \ - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; \ - unpuckU32ToThree_1(temp2[0]); \ - unpuckU32ToThree_1(temp2[1]); \ - unpuckU32ToThree_1(temp2[2]); \ - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; \ - memcpy(out, t, 12 * sizeof(unsigned char)); \ + u32 t[3] = { 0 };\ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ + ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ + ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ + t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ + ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ + ((t0 & 0x3fc00) >> 10); \ + t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ + ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ + ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ + unpuckU32ToThree_3(t[0]); \ + unpuckU32ToThree_3(t[1]); \ + unpuckU32ToThree_3(t[2]); \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ } - -#define packU48FormatToThreePacket( out, in) {\ - u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); \ - u32 temp0[3] = { 0 }; \ - u32 temp1[3] = { 0 }; \ - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; \ - puckU32ToThree_1(temp0[0]); \ - puckU32ToThree_1(temp0[1]); \ - puckU32ToThree_1(temp0[2]); \ - temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; \ - puckU32ToThree_1(temp1[0]); \ - puckU32ToThree_1(temp1[1]); \ - puckU32ToThree_1(temp1[2]); \ - out[0] = (temp1[0] << 10) | temp0[2]; \ - out[1] = (temp1[2] << 11) | temp0[1]; \ - out[2] = (temp1[1] << 11) | temp0[0]; \ +#define packU48FormatToThreePacket(out, in) {\ + u32 t0 = U32BIG(*(u32*)(in)), t1 = (u32)U16BIG(*(u16*)(in + 4)); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + out[0] = \ + (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ + ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ + out[1] = \ + ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ + (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ + out[2] = \ + (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ + (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ } - #define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ t3= LOTR32(t2, 2);\ t4 =LOTR32(t0, 3);\ diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_3/hash.c b/knot/Implementations/crypto_hash/knot384/armcortexm_3/hash.c index 52cc5c5..746235b 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_3/hash.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_3/hash.c @@ -1,6 +1,7 @@ #include"auxFormat.h" -#define hash_RATE (48 / 8) +//#define hash_RATE (48 / 8) +#define hash_RATE 6 #define PRH_ROUNDS 104 //12*9=108 @@ -42,7 +43,7 @@ int crypto_hash(unsigned char *out, const unsigned char *in, inlen -= hash_RATE; in += hash_RATE; } - memset(tempData, 0, sizeof(tempData)); + memset(tempData, 0, hash_RATE); memcpy(tempData, in, inlen * sizeof(unsigned char)); tempData[inlen] = 0x01; Processing_Data(tempData); diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.c index 1d799e6..46f3a49 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.c @@ -1,52 +1,37 @@ #include"auxFormat.h" -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0]>>10) & 0x7ff; - temp1[1] = (in[2] >>11 ) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; - memcpy(out, t, 12 * sizeof(unsigned char)); -} +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 }; \ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ + ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ + ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ + t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ + ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ + ((t0 & 0x3fc00) >> 10); \ + t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ + ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ + ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ + unpuckU32ToThree_3(t[0]); \ + unpuckU32ToThree_3(t[1]); \ + unpuckU32ToThree_3(t[2]); \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ +} void packU48FormatToThreePacket(u32 * out, u8 * in) { - u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - out[0] = (temp1[0] << 10) | temp0[2]; - out[1] = (temp1[2] << 11) | temp0[1]; - out[2] = (temp1[1] << 11) | temp0[0]; + u32 t0 = U32BIG(*(u32*)(in)), t1 = (u32)U16BIG(*(u16*)(in + 4)); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + out[0] = \ + (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ + ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ + out[1] = \ + ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ + (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ + out[2] = \ + (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ + (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ } - - unsigned char constant7Format[104] = { /*constant7Format[127]: 12*9=108*/ 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.h index d9d24e4..e814e1d 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.h @@ -17,21 +17,6 @@ void packU48FormatToThreePacket(u32 * out, u8 * in) ; void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) ; void unpackU96FormatToThreePacket(u8 * out, u32 * in) ; -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ -} -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ -} - #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) @@ -54,4 +39,21 @@ t4 = LOTR32(t2, 18);\ t5 = LOTR32(t0, 19); \ } unsigned char constant7Format[104]; - +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +} +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +} diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_4/hash.c b/knot/Implementations/crypto_hash/knot384/armcortexm_4/hash.c index 8abbbe3..020c61d 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_4/hash.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_4/hash.c @@ -1,6 +1,7 @@ #include"auxFormat.h" -#define hash_RATE (48 / 8) +//#define hash_RATE (48 / 8) +#define hash_RATE 6 #define PRH_ROUNDS 104 @@ -20,7 +21,7 @@ int crypto_hash(unsigned char *out, const unsigned char *in, inlen -= hash_RATE; in += hash_RATE; } - memset(tempData, 0, sizeof(tempData)); + memset(tempData, 0, hash_RATE); memcpy(tempData, in, inlen * sizeof(unsigned char)); tempData[inlen] = 0x01; packU48FormatToThreePacket(dataFormat, tempData); diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.c b/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.c index 0693568..4612893 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.c @@ -1,51 +1,54 @@ #include"auxFormat.h" -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; - u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0]>>10) & 0x7ff; - temp1[1] = (in[2] >>11 ) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; - memcpy(out, t, 12 * sizeof(unsigned char)); +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +} +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +} +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 t[3] = { 0 }; \ + u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ + ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ + ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ + t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ + ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ + ((t0 & 0x3fc00) >> 10); \ + t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ + ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ + ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ + unpuckU32ToThree_3(t[0]); \ + unpuckU32ToThree_3(t[1]); \ + unpuckU32ToThree_3(t[2]); \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ } - void packU48FormatToThreePacket(u32 * out, u8 * in) { - u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - out[0] = (temp1[0] << 10) | temp0[2]; - out[1] = (temp1[2] << 11) | temp0[1]; - out[2] = (temp1[1] << 11) | temp0[0]; + u32 t0 = U32BIG(*(u32*)(in)), t1 = (u32)U16BIG(*(u16*)(in + 4)); \ + puckU32ToThree_3(t0); \ + puckU32ToThree_3(t1); \ + out[0] = \ + (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ + ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ + out[1] = \ + ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ + (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ + out[2] = \ + (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ + (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ } - unsigned char constant7Format[104] = { /*constant7Format[127]: 12*9=108*/ 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.h index 7d2a5a9..14d0701 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.h @@ -12,7 +12,6 @@ typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; - #define puckU32ToThree_1(x){\ x &= 0x49249249;\ x = (x | (x >> 2)) & 0xc30c30c3;\ @@ -187,5 +186,5 @@ SBOX3(s[2], s[5], s[8], s[11]);\ : : );\ } -unsigned char constant7Format[104]; +unsigned char constant7Format[104]; diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c index a12c2aa..b3757c3 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c @@ -1,62 +1,51 @@ #include"auxFormat.h" -void unpackU96FormatToThreePacket(u8 * out, u32 * in) { - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - u32 temp2[3] = { 0 }; +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { u32 t[3] = { 0 }; - temp0[0] = in[2] & 0x7ff; - temp0[1] = in[1] & 0x7ff; - temp0[2] = in[0] & 0x3ff; - temp1[0] = (in[0]>>10) & 0x7ff; - temp1[1] = (in[2] >>11 ) & 0x7ff; - temp1[2] = (in[1] >> 11) & 0x3ff; - temp2[0] = in[1] >> 21; - temp2[1] = in[0] >> 21; - temp2[2] = in[2] >> 22; - unpuckU32ToThree_1(temp0[0]); - unpuckU32ToThree_1(temp0[1]); - unpuckU32ToThree_1(temp0[2]); - t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; - unpuckU32ToThree_1(temp1[0]); - unpuckU32ToThree_1(temp1[1]); - unpuckU32ToThree_1(temp1[2]); - t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; - unpuckU32ToThree_1(temp2[0]); - unpuckU32ToThree_1(temp2[1]); - unpuckU32ToThree_1(temp2[2]); - t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + u32 t0 = in[0], t1 = in[1], t2 = in[2]; + t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) + | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) + | ((t1 & 0xff) << 8) | (t2 & 0xff); + t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) + | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) + | ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) + | ((t2 & 0x7f800) >> 3) | ((t0 & 0x3fc00) >> 10); + t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) + | ((t2 & 0xc0000000) >> 2) | ((t0 & 0x60000000) >> 3) + | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) + | ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); + unpuckU32ToThree_3(t[0]); + unpuckU32ToThree_3(t[1]); + unpuckU32ToThree_3(t[2]); memcpy(out, t, 12 * sizeof(unsigned char)); } - -void packU48FormatToThreePacket(u32 * out, u8 * in) { - u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); - u32 temp0[3] = { 0 }; - u32 temp1[3] = { 0 }; - temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; - puckU32ToThree_1(temp0[0]); - puckU32ToThree_1(temp0[1]); - puckU32ToThree_1(temp0[2]); - temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; - puckU32ToThree_1(temp1[0]); - puckU32ToThree_1(temp1[1]); - puckU32ToThree_1(temp1[2]); - out[0] = (temp1[0] << 10) | temp0[2]; - out[1] = (temp1[2] << 11) | temp0[1]; - out[2] = (temp1[1] << 11) | temp0[0]; +void packU48FormatToThreePacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(*(u32* )(in)), t1 = (u32) U16BIG(*(u16* )(in + 4)); + puckU32ToThree_3(t0); + puckU32ToThree_3(t1); + out[0] = (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) + | ((t1 & 0x000000ff) << 10)) + | ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); + out[1] = ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) + | (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) + | ((t0 & 0xff00) >> 8))); + out[2] = (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) + | ((t1 & 0xff00) << 3)) + | (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) + | (t0 & 0x000000ff)); } -unsigned char constant7Format[104] = { - /*constant7Format[127]: 12*9=108*/ -0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, -0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, -0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, -0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, -0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, -0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, -0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, -0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, -0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,}; +unsigned char constant7Format[104] = { +/*constant7Format[127]: 12*9=108*/ +0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, + 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, + 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, + 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, + 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, + 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, + 0x1e, 0xd1, 0x8f, 0x5c, 0xc3, 0x1f, 0xd9, 0xcf, 0x5e, 0xd3, 0x9f, 0xdc, + 0xc6, 0x16, 0x91, 0x8d, 0x4c, 0x43, 0x1a, 0xd0, 0x87, 0x1c, 0xc1, 0x0f, + 0x59, 0xca, 0x57, 0x9b, 0xdd, 0xce, 0x56, }; void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { unsigned int t, t1, t2; u32 rci, temp; @@ -70,7 +59,7 @@ void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { temp = ((u32*) (c + t))[0]; rci = temp & 0xff; ARC(rci); - SBOX1_ROR(s[0], s[4], s[8], s[10] ); + SBOX1_ROR(s[0], s[4], s[8], s[10]); SBOX2_ROR(s[1], s[5], s[6], s[11]); SBOX3_ROR(s[2], s[3], s[7], s[9]); rci = (temp & 0xff00) >> 8; diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h index 5353b39..39d3c6a 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h @@ -13,19 +13,23 @@ typedef unsigned int u32; typedef unsigned long long u64; -#define puckU32ToThree_1(x){\ -x &= 0x49249249;\ -x = (x | (x >> 2)) & 0xc30c30c3;\ -x = (x | (x >>4)) & 0x0f00f00f;\ -x = (x | (x >> 8)) & 0xff0000ff;\ -x = (x | (x >> 16)) & 0xfff;\ +#define puckU32ToThree_3(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ } -#define unpuckU32ToThree_1(x){\ -x &= 0xfff;\ -x = (x | (x << 16)) & 0xff0000ff;\ -x = (x | (x << 8)) & 0x0f00f00f;\ -x = (x | (x << 4)) & 0xc30c30c3;\ -x = (x | (x << 2)) & 0x49249249;\ +#define unpuckU32ToThree_3(lo){\ + u32 r0;\ +r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ +r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ +r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } #define ARC(rci) \ do { \ diff --git a/knot/Implementations/crypto_hash/knot512/LWC_HASH_KAT_512.txt b/knot/Implementations/crypto_hash/knot512/LWC_HASH_KAT_512.txt index 801bf50..49c7240 100644 --- a/knot/Implementations/crypto_hash/knot512/LWC_HASH_KAT_512.txt +++ b/knot/Implementations/crypto_hash/knot512/LWC_HASH_KAT_512.txt @@ -157,6 +157,9 @@ MD = AC246201475E1A9BAC4CF5641BB311770D257B5BC82046747F0677D238699638C86DF97A357 Count = 40 Msg = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F20212223242526 MD = F076AE7C96A68F532B4DC97FA06669D044D5A24EFF341DFAF87BA1CB706E08AC4AA180FD33C3C347185494F83BE08878A38C9ED067395A36550439E7474D35C2 +Count = 40 +Msg = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F20212223242526 +MD = F076AE7C96A68F532B4DC97FA06669D044D5A24EFF341DFAF87BA1CB706E08AC4AA180FD33C3C347185494F83BE08878A38C9ED067395A36550439E7474D35C2 Count = 41 Msg = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F2021222324252627 diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_hash/knot512/armcortexm_2/auxFormat.c index b665df7..e5d83c9 100644 --- a/knot/Implementations/crypto_hash/knot512/armcortexm_2/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_2/auxFormat.c @@ -21,13 +21,11 @@ void P512(unsigned int *s, unsigned char *round, unsigned char rounds) { } } void packU64FormatToFourPacket(u32 *out, u8 *in) { - u32 t1, t2, temp1; + u32 t1, t2; t1 = U32BIG(((u32* )in)[0]); t2 = U32BIG(((u32* )in)[1]); - puck32(t1); - puck32(t1); - puck32(t2); - puck32(t2); + puckU32ToFour(t2); + puckU32ToFour(t1); out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); @@ -36,7 +34,6 @@ void packU64FormatToFourPacket(u32 *out, u8 *in) { void unpackU128FormatToFourPacket(u8 *out, u32 *in) { u32 t[4] = { 0 }; - u32 r0; t[3] = (in[3] & 0xff000000) | ((in[2] >> 8) & 0x00ff0000) | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) @@ -45,14 +42,10 @@ void unpackU128FormatToFourPacket(u8 *out, u32 *in) { | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); - unpuck32(t[0]); - unpuck32(t[0]); - unpuck32(t[1]); - unpuck32(t[1]); - unpuck32(t[2]); - unpuck32(t[2]); - unpuck32(t[3]); - unpuck32(t[3]); + unpuckU32ToFour(t[0]); + unpuckU32ToFour(t[1]); + unpuckU32ToFour(t[2]); + unpuckU32ToFour(t[3]); + memcpy(out, t, 16 * sizeof(unsigned char)); } - diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_hash/knot512/armcortexm_2/auxFormat.h index 4f12a00..428354e 100644 --- a/knot/Implementations/crypto_hash/knot512/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_2/auxFormat.h @@ -10,21 +10,25 @@ typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; -#define puck32(in)\ -{\ -temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ -temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ -temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ -temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ +#define puckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ } -#define unpuck32(t0){\ - r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ - r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ - r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ - r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ +#define unpuckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ } - //t1 #define P512_ARC_1(rci) \ do { \ diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_hash/knot512/armcortexm_3/auxFormat.h index 566afab..c6de2d5 100644 --- a/knot/Implementations/crypto_hash/knot512/armcortexm_3/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_3/auxFormat.h @@ -19,20 +19,23 @@ typedef unsigned int u32; typedef unsigned long long u64; void printU8(char name[], u8 var[], long len, int offset); -// t9 -#define puck32(in)\ -{\ -t9 = (in ^ (in >> 1)) & 0x22222222; in ^= t9 ^ (t9 << 1);\ -t9 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= t9 ^ (t9 << 2);\ -t9 = (in ^ (in >> 4)) & 0x00F000F0; in ^= t9 ^ (t9 << 4);\ -t9 = (in ^ (in >> 8)) & 0x0000FF00; in ^= t9 ^ (t9 << 8);\ +#define puckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ } -// t9 -#define unpuck32(t0){\ - t9 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= t9 ^ (t9 << 8); \ - t9 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= t9 ^ (t9 << 4); \ - t9 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= t9 ^ (t9 << 2); \ - t9 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= t9 ^ (t9 << 1); \ +#define unpuckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ } //u32 u32 t1, t2, t3,t8, #define unpackU128FormatToFourPacket( out, dataFormat) {\ @@ -40,10 +43,10 @@ t3 = (dataFormat[3] & 0xff000000) | ((dataFormat[2] >> 8) & 0x00ff0000) | ((data t2 = ((dataFormat[3] << 8) & 0xff000000) | (dataFormat[2] & 0x00ff0000) | ((dataFormat[1] >> 8) & 0x0000ff00) | ((dataFormat[0] >> 16) & 0x000000ff); \ t1 = ((dataFormat[3] << 16) & 0xff000000) | ((dataFormat[2] << 8) & 0x00ff0000) | (dataFormat[1] & 0x0000ff00) | ((dataFormat[0] >> 8) & 0x000000ff); \ t8 = ((dataFormat[3] << 24) & 0xff000000) | ((dataFormat[2] << 16) & 0x00ff0000) | ((dataFormat[1] << 8) & 0x0000ff00) | (dataFormat[0] & 0x000000ff); \ -unpuck32(t8); unpuck32(t8); \ -unpuck32(t1); unpuck32(t1); \ -unpuck32(t2); unpuck32(t2); \ -unpuck32(t3); unpuck32(t3); \ +unpuckU32ToFour(t8);\ +unpuckU32ToFour(t1); \ +unpuckU32ToFour(t2); \ +unpuckU32ToFour(t3); \ ((u32*)out)[0] = U32BIG(t8); \ ((u32*)out)[1] = U32BIG(t1); \ ((u32*)out)[2] = U32BIG(t2); \ @@ -52,10 +55,8 @@ unpuck32(t3); unpuck32(t3); \ #define packU64FormatToFourPacket( out, in) {\ t1 = U32BIG(((u32*)in)[0]); \ t2 = U32BIG(((u32*)in)[1]); \ -puck32(t1); \ -puck32(t1); \ -puck32(t2); \ -puck32(t2); \ +puckU32ToFour(t1); \ +puckU32ToFour(t2); \ out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); \ out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); \ out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); \ diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_hash/knot512/armcortexm_4/auxFormat.c index 356bae8..9b30793 100644 --- a/knot/Implementations/crypto_hash/knot512/armcortexm_4/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_4/auxFormat.c @@ -22,13 +22,11 @@ void P512(unsigned int *s, unsigned char *round, unsigned char lunNum) { } } void packU64FormatToFourPacket(u32 *out, u8 *in) { - u32 t1, t2, temp1; + u32 t1, t2; t1 = U32BIG(((u32* )in)[0]); t2 = U32BIG(((u32* )in)[1]); - puck32(t1); - puck32(t1); - puck32(t2); - puck32(t2); + puckU32ToFour(t2); + puckU32ToFour(t1); out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); @@ -37,7 +35,6 @@ void packU64FormatToFourPacket(u32 *out, u8 *in) { void unpackU128FormatToFourPacket(u8 *out, u32 *in) { u32 t[4] = { 0 }; - u32 r0; t[3] = (in[3] & 0xff000000) | ((in[2] >> 8) & 0x00ff0000) | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) @@ -46,14 +43,10 @@ void unpackU128FormatToFourPacket(u8 *out, u32 *in) { | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); - unpuck32(t[0]); - unpuck32(t[0]); - unpuck32(t[1]); - unpuck32(t[1]); - unpuck32(t[2]); - unpuck32(t[2]); - unpuck32(t[3]); - unpuck32(t[3]); + unpuckU32ToFour(t[0]); + unpuckU32ToFour(t[1]); + unpuckU32ToFour(t[2]); + unpuckU32ToFour(t[3]); + memcpy(out, t, 16 * sizeof(unsigned char)); } - diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_hash/knot512/armcortexm_4/auxFormat.h index 489c679..c2c082a 100644 --- a/knot/Implementations/crypto_hash/knot512/armcortexm_4/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_4/auxFormat.h @@ -10,20 +10,24 @@ typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; -#define puck32(in)\ -{\ -temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ -temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ -temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ -temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ +#define puckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ } -#define unpuck32(t0){\ - r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ - r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ - r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ - r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ +#define unpuckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ } - #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) #define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_5/auxFormat.c b/knot/Implementations/crypto_hash/knot512/armcortexm_5/auxFormat.c index 00dbfda..1028787 100644 --- a/knot/Implementations/crypto_hash/knot512/armcortexm_5/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_5/auxFormat.c @@ -57,13 +57,11 @@ void P512(unsigned int *s, unsigned char *round, unsigned char rounds) { } void packU64FormatToFourPacket(u32 *out, u8 *in) { - u32 t1, t2, temp1; + u32 t1, t2; t1 = U32BIG(((u32* )in)[0]); t2 = U32BIG(((u32* )in)[1]); - puck32(t1); - puck32(t1); - puck32(t2); - puck32(t2); + puckU32ToFour(t2); + puckU32ToFour(t1); out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); @@ -72,7 +70,6 @@ void packU64FormatToFourPacket(u32 *out, u8 *in) { void unpackU128FormatToFourPacket(u8 *out, u32 *in) { u32 t[4] = { 0 }; - u32 r0; t[3] = (in[3] & 0xff000000) | ((in[2] >> 8) & 0x00ff0000) | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) @@ -81,14 +78,12 @@ void unpackU128FormatToFourPacket(u8 *out, u32 *in) { | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); - unpuck32(t[0]); - unpuck32(t[0]); - unpuck32(t[1]); - unpuck32(t[1]); - unpuck32(t[2]); - unpuck32(t[2]); - unpuck32(t[3]); - unpuck32(t[3]); + unpuckU32ToFour(t[0]); + unpuckU32ToFour(t[1]); + unpuckU32ToFour(t[2]); + unpuckU32ToFour(t[3]); + memcpy(out, t, 16 * sizeof(unsigned char)); } + diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_5/auxFormat.h b/knot/Implementations/crypto_hash/knot512/armcortexm_5/auxFormat.h index b31e7e5..144611e 100644 --- a/knot/Implementations/crypto_hash/knot512/armcortexm_5/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_5/auxFormat.h @@ -10,18 +10,23 @@ typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; -#define puck32(in)\ -{\ -temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ -temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ -temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ -temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ +#define puckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ } -#define unpuck32(t0){\ - r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ - r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ - r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ - r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ +#define unpuckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ } void P512(unsigned int *s, unsigned char *round, unsigned char rounds); diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.c index 4d2cbf5..af6d4c3 100644 --- a/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.c @@ -55,14 +55,13 @@ void P512(unsigned int *s, unsigned char *round, unsigned char rounds) { SBOX3(s[3], s[4], s[11], s[12]); SR(s[4], s[8], s[9], s[10], s[11], s[12], s[13], s[14], s[15]); } + void packU64FormatToFourPacket(u32 *out, u8 *in) { - u32 t1, t2, temp1; + u32 t1, t2; t1 = U32BIG(((u32* )in)[0]); t2 = U32BIG(((u32* )in)[1]); - puck32(t1); - puck32(t1); - puck32(t2); - puck32(t2); + puckU32ToFour(t2); + puckU32ToFour(t1); out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); @@ -71,7 +70,6 @@ void packU64FormatToFourPacket(u32 *out, u8 *in) { void unpackU128FormatToFourPacket(u8 *out, u32 *in) { u32 t[4] = { 0 }; - u32 r0; t[3] = (in[3] & 0xff000000) | ((in[2] >> 8) & 0x00ff0000) | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) @@ -80,14 +78,11 @@ void unpackU128FormatToFourPacket(u8 *out, u32 *in) { | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); - unpuck32(t[0]); - unpuck32(t[0]); - unpuck32(t[1]); - unpuck32(t[1]); - unpuck32(t[2]); - unpuck32(t[2]); - unpuck32(t[3]); - unpuck32(t[3]); + unpuckU32ToFour(t[0]); + unpuckU32ToFour(t[1]); + unpuckU32ToFour(t[2]); + unpuckU32ToFour(t[3]); + memcpy(out, t, 16 * sizeof(unsigned char)); } diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.h index 07ff59e..9aa6a93 100644 --- a/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.h @@ -10,23 +10,26 @@ typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; -#define puck32(in)\ -{\ -temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ -temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ -temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ -temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ +#define puckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ } -#define unpuck32(t0){\ - r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ - r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ - r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ - r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ +#define unpuckU32ToFour(lo){\ +u32 r0;\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 8)) & 0x00ff0000, lo ^= r0 ^ (r0 >> 8);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ +r0 = (lo ^ (lo << 4)) & 0x0f000f00, lo ^= r0 ^ (r0 >> 4);\ +r0 = (lo ^ (lo << 1)) & 0x44444444, lo ^= r0 ^ (r0 >> 1);\ +r0 = (lo ^ (lo << 2)) & 0x30303030, lo ^= r0 ^ (r0 >> 2);\ } void P512(unsigned int *s, unsigned char *round, unsigned char rounds); -void unpackU128FormatToFourPacket(u8 *out, u32 *in); -void packU64FormatToFourPacket(u32 *out, u8 *in) ; #define ARC(rci) \ do { \ __asm__ __volatile__ ( \ diff --git a/knot/Implementations/crypto_hash/knot512/opt_1/hash.c b/knot/Implementations/crypto_hash/knot512/opt_1/hash.c index 91b3aa8..21b7ccb 100644 --- a/knot/Implementations/crypto_hash/knot512/opt_1/hash.c +++ b/knot/Implementations/crypto_hash/knot512/opt_1/hash.c @@ -1,4 +1,5 @@ #include "api.h" +#include typedef unsigned char u8; typedef unsigned long long u64; @@ -57,7 +58,7 @@ int crypto_hash(unsigned char *out, const unsigned char *in, u64 b01, b11, b21, b31, b00, b10, b20, b30; u64 t1, t2, t3, t5, t6, t8, t9, t11; u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0, x31 = 0, x21 = 0, x11 = 0, x01 = 0; - u64 i; + u64 i; u8 tempData[32]; // initialization @@ -69,9 +70,9 @@ int crypto_hash(unsigned char *out, const unsigned char *in, } inlen -= RATE; in += RATE; - } - memset(tempData, 0, RATE); - memcpy(tempData, in, inlen * sizeof(unsigned char)); + } + memset(tempData, 0, RATE); + memcpy(tempData, in, inlen * sizeof(unsigned char)); tempData[inlen] = 0x01; x00 ^= U64BIG(((u64*)tempData)[0]); diff --git a/knot/Implementations/crypto_hash/knot512/opt_SSE/hash.c b/knot/Implementations/crypto_hash/knot512/opt_SSE/hash.c index 6d33b6b..190962e 100644 --- a/knot/Implementations/crypto_hash/knot512/opt_SSE/hash.c +++ b/knot/Implementations/crypto_hash/knot512/opt_SSE/hash.c @@ -1,4 +1,5 @@ #include "api.h" +#include typedef unsigned char u8; typedef unsigned long long u64; -- libgit2 0.26.0