diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c index b9e0da1..5d7ce39 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.c @@ -1,38 +1,60 @@ #include"auxFormat.h" -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - -out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | - ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | - (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); -out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | - ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | - (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); -out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | - ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | - (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 } ;\ -u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ -t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ -((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ -((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ -t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ -((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ -((t0 & 0x3fc00) >> 10); \ -t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ -((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ -((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ -unpuckU32ToThree_3(t[0]); \ -unpuckU32ToThree_3(t[1]); \ -unpuckU32ToThree_3(t[2]); \ -memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } + void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { u32 rci,t1,t2; unsigned char i; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h index aa7b91b..9bc38ac 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h @@ -31,24 +31,21 @@ void packU96FormatToThreePacket(u32 * out, u8 * in); void unpackU96FormatToThreePacket(u8 * out, u32 * in); void P384(unsigned int *s, unsigned char *round, unsigned char lunNum); -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } + unsigned char constant7Format[80]; #define P384_ARC_SC1(rci,S2,S3,S4) \ diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h index b545bcb..f612bf3 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h @@ -52,6 +52,20 @@ do { \ s[4] ^= dataFormat[4]; \ s[5] ^= dataFormat[5]; \ } while (0) +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} #define puckU32ToThree_3(lo){\ @@ -74,37 +88,52 @@ r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ } #define packU96FormatToThreePacket( out, in) { \ - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ - ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ - (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ - out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ - ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ - (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ - out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ - ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ - (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ + t1=U32BIG(((u32*)in)[0]); \ + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; \ + puckU32ToThree_1(temp0[0]); \ + puckU32ToThree_1(temp0[1]); \ + puckU32ToThree_1(temp0[2]); \ + t1=U32BIG(((u32*)in)[1]); \ + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; \ + puckU32ToThree_1(temp1[0]); \ + puckU32ToThree_1(temp1[1]); \ + puckU32ToThree_1(temp1[2]); \ + t1=U32BIG(((u32*)in)[2]); \ + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; \ + puckU32ToThree_1(temp2[0]); \ + puckU32ToThree_1(temp2[1]); \ + puckU32ToThree_1(temp2[2]); \ + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; \ + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; \ + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; \ } #define unpackU96FormatToThreePacket( out, in) {\ - u32 t[3] = { 0 };\ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ - ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ - ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ - t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ - ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ - ((t0 & 0x3fc00) >> 10); \ - t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ - ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ - ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ - unpuckU32ToThree_3(t[0]); \ - unpuckU32ToThree_3(t[1]); \ - unpuckU32ToThree_3(t[2]); \ - memcpy(out, t, 12 * sizeof(unsigned char)); \ + t3=in[0] ; \ + t1=in[1] ; \ + t2=in[2] ; \ + temp0[0] = t2 & 0x7ff; \ + temp0[1] = t1 & 0x7ff; \ + temp0[2] = t3 & 0x3ff; \ + temp1[0] = (t3>>10) & 0x7ff; \ + temp1[1] = (t2 >>11 ) & 0x7ff; \ + temp1[2] = (t1 >> 11) & 0x3ff; \ + temp2[0] = t1 >> 21; \ + temp2[1] = t3 >> 21; \ + temp2[2] = t2 >> 22; \ + unpuckU32ToThree_1(temp0[0]); \ + unpuckU32ToThree_1(temp0[1]); \ + unpuckU32ToThree_1(temp0[2]); \ + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; \ + unpuckU32ToThree_1(temp1[0]); \ + unpuckU32ToThree_1(temp1[1]); \ + unpuckU32ToThree_1(temp1[2]); \ + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; \ + unpuckU32ToThree_1(temp2[0]); \ + unpuckU32ToThree_1(temp2[1]); \ + unpuckU32ToThree_1(temp2[2]); \ + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ } diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c index 54608a1..b0c479e 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c @@ -34,6 +34,10 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, u32 dataFormat[6] = { 0 }; u32 s_temp[12] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t[3] = { 0 }; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; u8 tempU8[24] = { 0 }; *clen = mlen + CRYPTO_ABYTES; // initialization @@ -112,6 +116,10 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, u8 tempData[24] = { 0 }; u8 tempU8[24] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t[3] = { 0 }; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; *mlen = clen - CRYPTO_ABYTES; if (clen < CRYPTO_ABYTES) return -1; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c index 8abb1d9..05be750 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.c @@ -1,36 +1,57 @@ #include"auxFormat.h" -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ - ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ - (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ - out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ - ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ - (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ - out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ - ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ - (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ - +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 } ;\ -u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ -t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ -((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ -((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ -t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ -((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ -((t0 & 0x3fc00) >> 10); \ -t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ -((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ -((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ -unpuckU32ToThree_3(t[0]); \ -unpuckU32ToThree_3(t[1]); \ -unpuckU32ToThree_3(t[2]); \ -memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { u32 s_temp[12] = { 0 }; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h index 445812e..da048b8 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4/auxFormat.h @@ -46,22 +46,18 @@ void packU96FormatToThreePacket(u32 * out, u8 * in); void unpackU96FormatToThreePacket(u8 * out, u32 * in); unsigned char constant7Format[80]; - -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.c index 71bc26a..7c1e5b9 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.c @@ -17,37 +17,58 @@ void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { t++; } } -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ - ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ - (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ - out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ - ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ - (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ - out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ - ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ - (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ - +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 } ;\ -u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ -t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ -((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ -((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ -t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ -((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ -((t0 & 0x3fc00) >> 10); \ -t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ -((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ -((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ -unpuckU32ToThree_3(t[0]); \ -unpuckU32ToThree_3(t[1]); \ -unpuckU32ToThree_3(t[2]); \ -memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.h index 0a4e119..6b25cc4 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_5/auxFormat.h @@ -13,25 +13,22 @@ void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } + #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) //Processing_Data: #define Processing_Data(data) \ diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c index 452920a..c1a6729 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c @@ -1,40 +1,59 @@ #include"auxFormat.h" - -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ - ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ - (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ - out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ - ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ - (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ - out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ - ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ - (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ - +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 } ;\ -u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ -t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ -((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ -((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ -t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ -((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ -((t0 & 0x3fc00) >> 10); \ -t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ -((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ -((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ -unpuckU32ToThree_3(t[0]); \ -unpuckU32ToThree_3(t[1]); \ -unpuckU32ToThree_3(t[2]); \ -memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } - void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { unsigned int t, t1, t2; u32 rci, temp; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h index c7b7c95..5a1e141 100644 --- a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h @@ -11,25 +11,22 @@ typedef unsigned long long u64; void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } + #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) //Processing_Data: #define Processing_Data(data) \ diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c index e263a92..0f52ffc 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.c @@ -10,42 +10,61 @@ void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { P384_SR();\ } } - - - -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - - out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | - ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | - (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); - out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | - ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | - (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); - out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | - ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | - (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 } ;\ -u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ -t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ -((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ -((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ -t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ -((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ -((t0 & 0x3fc00) >> 10); \ -t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ -((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ -((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ -unpuckU32ToThree_3(t[0]); \ -unpuckU32ToThree_3(t[1]); \ -unpuckU32ToThree_3(t[2]); \ -memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } + unsigned char constant7Format[76] = { /*constant7Format[127]:*/ 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h index aaa41e6..056e4a2 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h @@ -16,25 +16,22 @@ typedef unsigned long long u64; #define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } + unsigned char constant7Format[76]; #define P384_ARC_SC1(rci,S2,S3,S4) \ do { \ diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h index 0320037..fa3a611 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h @@ -22,59 +22,77 @@ unsigned char constant7Format[76] ; t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ } -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } + + #define packU96FormatToThreePacket( out, in) { \ - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ - ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ - (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ - out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ - ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ - (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ - out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ - ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ - (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ + t1=U32BIG(((u32*)in)[0]); \ + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; \ + puckU32ToThree_1(temp0[0]); \ + puckU32ToThree_1(temp0[1]); \ + puckU32ToThree_1(temp0[2]); \ + t1=U32BIG(((u32*)in)[1]); \ + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; \ + puckU32ToThree_1(temp1[0]); \ + puckU32ToThree_1(temp1[1]); \ + puckU32ToThree_1(temp1[2]); \ + t1=U32BIG(((u32*)in)[2]); \ + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; \ + puckU32ToThree_1(temp2[0]); \ + puckU32ToThree_1(temp2[1]); \ + puckU32ToThree_1(temp2[2]); \ + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; \ + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; \ + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; \ } #define unpackU96FormatToThreePacket( out, in) {\ - u32 t[3] = { 0 };\ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ - ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ - ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ - t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ - ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ - ((t0 & 0x3fc00) >> 10); \ - t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ - ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ - ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ - unpuckU32ToThree_3(t[0]); \ - unpuckU32ToThree_3(t[1]); \ - unpuckU32ToThree_3(t[2]); \ - memcpy(out, t, 12 * sizeof(unsigned char)); \ + t3=in[0] ; \ + t1=in[1] ; \ + t2=in[2] ; \ + temp0[0] = t2 & 0x7ff; \ + temp0[1] = t1 & 0x7ff; \ + temp0[2] = t3 & 0x3ff; \ + temp1[0] = (t3>>10) & 0x7ff; \ + temp1[1] = (t2 >>11 ) & 0x7ff; \ + temp1[2] = (t1 >> 11) & 0x3ff; \ + temp2[0] = t1 >> 21; \ + temp2[1] = t3 >> 21; \ + temp2[2] = t2 >> 22; \ + unpuckU32ToThree_1(temp0[0]); \ + unpuckU32ToThree_1(temp0[1]); \ + unpuckU32ToThree_1(temp0[2]); \ + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; \ + unpuckU32ToThree_1(temp1[0]); \ + unpuckU32ToThree_1(temp1[1]); \ + unpuckU32ToThree_1(temp1[2]); \ + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; \ + unpuckU32ToThree_1(temp2[0]); \ + unpuckU32ToThree_1(temp2[1]); \ + unpuckU32ToThree_1(temp2[2]); \ + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ } + + + + + #define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ t3= LOTR32(t2, 2);\ t4 =LOTR32(t0, 3);\ diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c index 58a89db..318e9c5 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c @@ -34,6 +34,10 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, u8 tempU8[48] = { 0 }; u32 s_temp[12] = { 0 }; + u32 t[3] = { 0 }; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; *clen = mlen + CRYPTO_ABYTES; // initialization @@ -106,6 +110,10 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, u8 tempU8[48] = { 0 }; u32 s_temp[12] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t[3] = { 0 }; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; *mlen = clen - CRYPTO_ABYTES; if (clen < CRYPTO_ABYTES) return -1; diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c index a31721d..a9bfb17 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.c @@ -17,39 +17,61 @@ void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { } } -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ - ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ - (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ - out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ - ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ - (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ - out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ - ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ - (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ - +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 } ;\ -u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ -t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ -((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ -((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ -t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ -((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ -((t0 & 0x3fc00) >> 10); \ -t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ -((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ -((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ -unpuckU32ToThree_3(t[0]); \ -unpuckU32ToThree_3(t[1]); \ -unpuckU32ToThree_3(t[2]); \ -memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } + unsigned char constant7Format[76] = { /*constant7Format[127]:*/ 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h index 5fa905e..e1b4dd7 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4/auxFormat.h @@ -38,22 +38,17 @@ t5 = LOTR32(t0, 19); \ unsigned char constant7Format[76]; - -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.c index 6d4f512..e1c069f 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.c @@ -29,36 +29,57 @@ void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { } } -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ - ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ - (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ - out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ - ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ - (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ - out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ - ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ - (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ - +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 },r0 ;\ -u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ -t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ -((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ -((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ -t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ -((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ -((t0 & 0x3fc00) >> 10); \ -t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ -((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ -((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ -unpuckU32ToThree_3(t[0]); \ -unpuckU32ToThree_3(t[1]); \ -unpuckU32ToThree_3(t[2]); \ -memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.h index 4bdeb02..02902c3 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_5/auxFormat.h @@ -12,22 +12,19 @@ typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); #define ARC(rci) \ diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c index 0390123..d26cb27 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c @@ -28,36 +28,57 @@ void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { t += 3; } } -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - out[0] = ((((t2 >> 21) & 0x400) | (((t2 >> 18) & 0x300)) | ((t2 & 0xff00) >> 8)) << 21) | \ - ((((t1 >> 20) & 0x400) | ((t1 >> 16) & 0x300) | (t1 & 0xff)) << 10) | \ - (((t0 >> 20) & 0x300) | ((t0 & 0xff0000) >> 16)); \ - out[1] = ((((t2 >> 20) & 0x400) | ((t2 >> 16) & 0x300) | (t2 & 0xff)) << 21) | \ - ((((t1 >> 20) & 0x300) | ((t1 & 0xff0000) >> 16)) << 11) | \ - (((t0 >> 21) & 0x400) | ((t0 >> 18) & 0x300) | ((t0 & 0xff00) >> 8)); \ - out[2] = ((((t2 >> 20) & 0x300) | ((t2 & 0xff0000) >> 16)) << 22) | \ - ((((t1 >> 21) & 0x400) | ((t1 >> 18) & 0x300)|((t1 & 0xff00) >> 8)) << 11) | \ - (((t0 >> 20) & 0x400) | ((t0 >> 16) & 0x300) | (t0 & 0xff)); \ - +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 };\ -u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ -t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ -((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ -((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ -t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ -((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ -((t0 & 0x3fc00) >> 10); \ -t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ -((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ -((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ -unpuckU32ToThree_3(t[0]); \ -unpuckU32ToThree_3(t[1]); \ -unpuckU32ToThree_3(t[2]); \ -memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h index 58944ef..cd415a7 100644 --- a/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h @@ -14,26 +14,20 @@ typedef unsigned long long u64; void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); - -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } - #define ARC(rci) \ do { \ __asm__ __volatile__ ( \ diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.c index e46d5e6..a256012 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.c @@ -1,65 +1,79 @@ #include"auxFormat.h" -void packU32FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32* )in)[0]); - puckU32ToThree_3(t0); - out[0] = ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); - out[1] = (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) - | ((t0 & 0xff00) >> 8)); - out[2] = (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) - | (t0 & 0x000000ff)); +void packU32FormatToThreePacket(u32 * out, u8 * in) { + u32 t2 = U32BIG(((u32*)in)[0]); + out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;//temp2[0] 0;temp2[1] 1;temp2[2] 2; + puckU32ToThree_1(out[0]); + puckU32ToThree_1(out[1]); + puckU32ToThree_1(out[2]); } -void unpackU32FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 };\ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) - | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) - | ((t1 & 0xff) << 8) | (t2 & 0xff); - unpuckU32ToThree_3(t[0]); - *(u32*) (out) = t[0]; +void unpackU32FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + temp0[0] = in[0] & 0x3ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[2] & 0x7ff; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); } -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32* )in)[0]), t1 = U32BIG(((u32* )in)[1]), t2 = U32BIG( - ((u32* )in)[2]); - puckU32ToThree_3(t0); - puckU32ToThree_3(t1); - puckU32ToThree_3(t2); - out[0] = (((t2 & 0x80000000)) | ((t2 & 0xc000000) << 3) - | ((t2 & 0xff00) << 13)) - | (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) - | ((t1 & 0x000000ff) << 10)) - | ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); - out[1] = (((t2 & 0x40000000) << 1) | ((t2 & 0x03000000) << 5) - | ((t2 & 0x000000ff) << 21)) - | ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) - | (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) - | ((t0 & 0xff00) >> 8))); - out[2] = ((((t2 & 0x30000000) << 2) | ((t2 & 0xff0000) << 6)) - | (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) - | ((t1 & 0xff00) << 3)) - | (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) - | (t0 & 0x000000ff))); + +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 };\ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) - | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) - | ((t1 & 0xff) << 8) | (t2 & 0xff); - t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) - | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) - | ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) - | ((t2 & 0x7f800) >> 3) | ((t0 & 0x3fc00) >> 10); - t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) - | ((t2 & 0xc0000000) >> 2) | ((t0 & 0x60000000) >> 3) - | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) - | ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); - unpuckU32ToThree_3(t[0]); - unpuckU32ToThree_3(t[1]); - unpuckU32ToThree_3(t[2]); +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; memcpy(out, t, 12 * sizeof(unsigned char)); } + unsigned char constant7Format[80] = { /*constant7Format[127]: 12*6=72*/ 0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.h index 8bbc86f..8de30c3 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/auxFormat.h @@ -9,23 +9,19 @@ typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } unsigned char constant7Format[80]; diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/hash.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/hash.c index 9c3dafd..ffc7947 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/hash.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_2/hash.c @@ -12,6 +12,10 @@ int crypto_hash(unsigned char *out, const unsigned char *in, u32 s[12] = { 0 }; u32 dataFormat[6] = { 0 }; u8 tempData[24] = { 0 }; + u32 t[3] = { 0 }; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; // initialization s[9] = 0x80000000; //absorb diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/auxFormat.h index 3114169..b7114a7 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/auxFormat.h @@ -41,70 +41,84 @@ U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ } -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } + #define packU32FormatToThreePacket( out, in) {\ -u32 t0 = U32BIG(((u32*)in)[0]); \ -puckU32ToThree_3(t0); \ -out[0] = ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ -out[1] = (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8)); \ -out[2] = (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ + u32 t2 = U32BIG(((u32*)in)[0]); \ + out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2; \ + puckU32ToThree_1(out[0]); \ + puckU32ToThree_1(out[1]); \ + puckU32ToThree_1(out[2]); \ } #define unpackU32FormatToThreePacket(out, in) {\ -u32 t[3] = { 0 } ;\ -u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ -t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ -((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ -((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ -unpuckU32ToThree_3(t[0]); \ -*(u32*)(out) = t[0]; \ + u32 temp0[3] = { 0 }; \ + temp0[0] = in[0] & 0x3ff; \ + temp0[1] = in[1] & 0x7ff; \ + temp0[2] = in[2] & 0x7ff; \ + unpuckU32ToThree_1(temp0[0]); \ + unpuckU32ToThree_1(temp0[1]); \ + unpuckU32ToThree_1(temp0[2]); \ + *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); \ } #define packU96FormatToThreePacket( out, in) { \ - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - out[0] = (((t2 & 0x80000000)) | ((t2 & 0xc000000) << 3) | ((t2 & 0xff00) << 13)) | \ - (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ - ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ - out[1] = (((t2 & 0x40000000) << 1) | ((t2 & 0x03000000) << 5) | ((t2 & 0x000000ff) << 21)) | \ - ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ - (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ - out[2] = ((((t2 & 0x30000000) << 2) | ((t2 & 0xff0000) << 6)) | \ - (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ - (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff))); \ + t1=U32BIG(((u32*)in)[0]); \ + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; \ + puckU32ToThree_1(temp0[0]); \ + puckU32ToThree_1(temp0[1]); \ + puckU32ToThree_1(temp0[2]); \ + t1=U32BIG(((u32*)in)[1]); \ + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; \ + puckU32ToThree_1(temp1[0]); \ + puckU32ToThree_1(temp1[1]); \ + puckU32ToThree_1(temp1[2]); \ + t1=U32BIG(((u32*)in)[2]); \ + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; \ + puckU32ToThree_1(temp2[0]); \ + puckU32ToThree_1(temp2[1]); \ + puckU32ToThree_1(temp2[2]); \ + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; \ + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; \ + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; \ } #define unpackU96FormatToThreePacket( out, in) {\ - u32 t[3] = { 0 };\ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ - ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ - ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ - t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ - ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ - ((t0 & 0x3fc00) >> 10); \ - t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ - ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ - ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ - unpuckU32ToThree_3(t[0]); \ - unpuckU32ToThree_3(t[1]); \ - unpuckU32ToThree_3(t[2]); \ - memcpy(out, t, 12 * sizeof(unsigned char)); \ + t3=in[0] ; \ + t1=in[1] ; \ + t2=in[2] ; \ + temp0[0] = t2 & 0x7ff; \ + temp0[1] = t1 & 0x7ff; \ + temp0[2] = t3 & 0x3ff; \ + temp1[0] = (t3>>10) & 0x7ff; \ + temp1[1] = (t2 >>11 ) & 0x7ff; \ + temp1[2] = (t1 >> 11) & 0x3ff; \ + temp2[0] = t1 >> 21; \ + temp2[1] = t3 >> 21; \ + temp2[2] = t2 >> 22; \ + unpuckU32ToThree_1(temp0[0]); \ + unpuckU32ToThree_1(temp0[1]); \ + unpuckU32ToThree_1(temp0[2]); \ + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; \ + unpuckU32ToThree_1(temp1[0]); \ + unpuckU32ToThree_1(temp1[1]); \ + unpuckU32ToThree_1(temp1[2]); \ + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; \ + unpuckU32ToThree_1(temp2[0]); \ + unpuckU32ToThree_1(temp2[1]); \ + unpuckU32ToThree_1(temp2[2]); \ + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ } + diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/hash.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/hash.c index e38acaa..80682d6 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/hash.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3/hash.c @@ -33,6 +33,10 @@ int crypto_hash(unsigned char *out, const unsigned char *in, u8 i, tempData[24] = { 0 }; u32 s_temp[12] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t[3] = { 0 }; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; // initialization s[9] = 0x80000000; //absorb diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.c index 6230f0b..cf94db4 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.c @@ -1,64 +1,78 @@ #include"auxFormat.h" -void packU32FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32* )in)[0]); - puckU32ToThree_3(t0); - out[0] = ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); - out[1] = (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) - | ((t0 & 0xff00) >> 8)); - out[2] = (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) - | (t0 & 0x000000ff)); +void packU32FormatToThreePacket(u32 * out, u8 * in) { + u32 t2 = U32BIG(((u32*)in)[0]); + out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;//temp2[0] 0;temp2[1] 1;temp2[2] 2; + puckU32ToThree_1(out[0]); + puckU32ToThree_1(out[1]); + puckU32ToThree_1(out[2]); } -void unpackU32FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 };\ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) - | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) - | ((t1 & 0xff) << 8) | (t2 & 0xff); - unpuckU32ToThree_3(t[0]); - *(u32*) (out) = t[0]; +void unpackU32FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + temp0[0] = in[0] & 0x3ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[2] & 0x7ff; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); } -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32* )in)[0]), t1 = U32BIG(((u32* )in)[1]), t2 = U32BIG( - ((u32* )in)[2]); - puckU32ToThree_3(t0); - puckU32ToThree_3(t1); - puckU32ToThree_3(t2); - out[0] = (((t2 & 0x80000000)) | ((t2 & 0xc000000) << 3) - | ((t2 & 0xff00) << 13)) - | (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) - | ((t1 & 0x000000ff) << 10)) - | ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); - out[1] = (((t2 & 0x40000000) << 1) | ((t2 & 0x03000000) << 5) - | ((t2 & 0x000000ff) << 21)) - | ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) - | (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) - | ((t0 & 0xff00) >> 8))); - out[2] = ((((t2 & 0x30000000) << 2) | ((t2 & 0xff0000) << 6)) - | (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) - | ((t1 & 0xff00) << 3)) - | (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) - | (t0 & 0x000000ff))); + +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 };\ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) - | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) - | ((t1 & 0xff) << 8) | (t2 & 0xff); - t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) - | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) - | ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) - | ((t2 & 0x7f800) >> 3) | ((t0 & 0x3fc00) >> 10); - t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) - | ((t2 & 0xc0000000) >> 2) | ((t0 & 0x60000000) >> 3) - | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) - | ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); - unpuckU32ToThree_3(t[0]); - unpuckU32ToThree_3(t[1]); - unpuckU32ToThree_3(t[2]); +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; memcpy(out, t, 12 * sizeof(unsigned char)); } + unsigned char constant7Format[80] = { /*constant7Format[127]: 12*6=72*/ 0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.h index ee7d2ea..85f53af 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4/auxFormat.h @@ -14,23 +14,19 @@ void unpackU96FormatToThreePacket(u8 * out, u32 * in) ; void packU96FormatToThreePacket(u32 * out, u8 * in); void unpackU32FormatToThreePacket(u8 * out, u32 * in); void packU32FormatToThreePacket(u32 * out, u8 * in); -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } unsigned char constant7Format[80]; diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.c index f7a5cc2..e5f279e 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.c @@ -1,64 +1,79 @@ #include"auxFormat.h" -void packU32FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32* )in)[0]); - puckU32ToThree_3(t0); - out[0] = ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); - out[1] = (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) - | ((t0 & 0xff00) >> 8)); - out[2] = (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) - | (t0 & 0x000000ff)); + +void packU32FormatToThreePacket(u32 * out, u8 * in) { + u32 t2 = U32BIG(((u32*)in)[0]); + out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;//temp2[0] 0;temp2[1] 1;temp2[2] 2; + puckU32ToThree_1(out[0]); + puckU32ToThree_1(out[1]); + puckU32ToThree_1(out[2]); } -void unpackU32FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 };\ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) - | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) - | ((t1 & 0xff) << 8) | (t2 & 0xff); - unpuckU32ToThree_3(t[0]); - *(u32*) (out) = t[0]; +void unpackU32FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + temp0[0] = in[0] & 0x3ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[2] & 0x7ff; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); } -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32* )in)[0]), t1 = U32BIG(((u32* )in)[1]), t2 = U32BIG( - ((u32* )in)[2]); - puckU32ToThree_3(t0); - puckU32ToThree_3(t1); - puckU32ToThree_3(t2); - out[0] = (((t2 & 0x80000000)) | ((t2 & 0xc000000) << 3) - | ((t2 & 0xff00) << 13)) - | (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) - | ((t1 & 0x000000ff) << 10)) - | ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); - out[1] = (((t2 & 0x40000000) << 1) | ((t2 & 0x03000000) << 5) - | ((t2 & 0x000000ff) << 21)) - | ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) - | (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) - | ((t0 & 0xff00) >> 8))); - out[2] = ((((t2 & 0x30000000) << 2) | ((t2 & 0xff0000) << 6)) - | (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) - | ((t1 & 0xff00) << 3)) - | (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) - | (t0 & 0x000000ff))); + +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 };\ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) - | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) - | ((t1 & 0xff) << 8) | (t2 & 0xff); - t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) - | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) - | ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) - | ((t2 & 0x7f800) >> 3) | ((t0 & 0x3fc00) >> 10); - t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) - | ((t2 & 0xc0000000) >> 2) | ((t0 & 0x60000000) >> 3) - | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) - | ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); - unpuckU32ToThree_3(t[0]); - unpuckU32ToThree_3(t[1]); - unpuckU32ToThree_3(t[2]); +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; memcpy(out, t, 12 * sizeof(unsigned char)); } + void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { unsigned int t, t1, t2; u32 rci; diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.h index b133d3c..7017560 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_5/auxFormat.h @@ -15,23 +15,19 @@ void unpackU96FormatToThreePacket(u8 * out, u32 * in) ; void packU96FormatToThreePacket(u32 * out, u8 * in); void unpackU32FormatToThreePacket(u8 * out, u32 * in); void packU32FormatToThreePacket(u32 * out, u8 * in); -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } ////////////constant begin// unsigned char constant7Format[80]; diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c index ac25779..ac81dee 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c @@ -1,52 +1,77 @@ #include"auxFormat.h" -void packU32FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32*)in)[0]); \ - puckU32ToThree_3(t0); \ - out[0] = ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ -out[1] = (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8)); \ -out[2] = (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ + +void packU32FormatToThreePacket(u32 * out, u8 * in) { + u32 t2 = U32BIG(((u32*)in)[0]); + out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;//temp2[0] 0;temp2[1] 1;temp2[2] 2; + puckU32ToThree_1(out[0]); + puckU32ToThree_1(out[1]); + puckU32ToThree_1(out[2]); } -void unpackU32FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 } ;\ -u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ -t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ -((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ -((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ -unpuckU32ToThree_3(t[0]); \ -*(u32*)(out) = t[0]; \ +void unpackU32FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + temp0[0] = in[0] & 0x3ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[2] & 0x7ff; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); } -void packU96FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(((u32*)in)[0]), t1 = U32BIG(((u32*)in)[1]), t2 = U32BIG(((u32*)in)[2]); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - puckU32ToThree_3(t2); \ - out[0] = (((t2 & 0x80000000)) | ((t2 & 0xc000000) << 3) | ((t2 & 0xff00) << 13)) | \ -(((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ -((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ -out[1] = (((t2 & 0x40000000) << 1) | ((t2 & 0x03000000) << 5) | ((t2 & 0x000000ff) << 21)) | \ -((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ -(((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ -out[2] = ((((t2 & 0x30000000) << 2) | ((t2 & 0xff0000) << 6)) | \ -(((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ -(((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff))); \ + +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t1=U32BIG(((u32*)in)[0]); + temp0[0] = t1; temp0[1] = t1 >> 1; temp0[2] = t1>> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + t1=U32BIG(((u32*)in)[1]); + temp1[0] = t1; temp1[1] = t1>>1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + t1=U32BIG(((u32*)in)[2]); + temp2[0] = t1; temp2[1] =t1 >> 1; temp2[2] = t1>> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; } -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 };\ -u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ -t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ -((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ -((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ -t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ -((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ -((t0 & 0x3fc00) >> 10); \ -t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ -((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ -((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ -unpuckU32ToThree_3(t[0]); \ -unpuckU32ToThree_3(t[1]); \ -unpuckU32ToThree_3(t[2]); \ -memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u32 t0=in[0] ; + u32 t1=in[1] ; + u32 t2=in[2] ; + temp0[0] = t2 & 0x7ff; + temp0[1] = t1 & 0x7ff; + temp0[2] = t0 & 0x3ff; + temp1[0] = (t0>>10) & 0x7ff; + temp1[1] = (t2 >>11 ) & 0x7ff; + temp1[2] = (t1 >> 11) & 0x3ff; + temp2[0] = t1 >> 21; + temp2[1] = t0 >> 21; + temp2[2] = t2 >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { unsigned int t, t1, t2; diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h index 253d360..fbbd733 100644 --- a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h @@ -11,24 +11,21 @@ typedef unsigned int u32; typedef unsigned long long u64; void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum) ; -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } + ////////////constant begin// unsigned char constant7Format[80]; #define ARC(rci) \ diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.c b/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.c index 242a621..ace73d1 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.c @@ -1,35 +1,48 @@ #include"auxFormat.h" - -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 }; \ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ - ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ - ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ - t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ - ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ - ((t0 & 0x3fc00) >> 10); \ - t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ - ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ - ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ - unpuckU32ToThree_3(t[0]); \ - unpuckU32ToThree_3(t[1]); \ - unpuckU32ToThree_3(t[2]); \ - memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0]>>10) & 0x7ff; + temp1[1] = (in[2] >>11 ) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } + void packU48FormatToThreePacket(u32 * out, u8 * in) { - u32 t0 = U32BIG(*(u32*)(in)), t1 = (u32)U16BIG(*(u16*)(in + 4)); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - out[0] = \ - (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ - ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ - out[1] = \ - ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ - (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ - out[2] = \ - (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ - (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ + u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + out[0] = (temp1[0] << 10) | temp0[2]; + out[1] = (temp1[2] << 11) | temp0[1]; + out[2] = (temp1[1] << 11) | temp0[0]; } unsigned char constant7Format[104] = { diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.h index 3b9606b..8e092aa 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_2/auxFormat.h @@ -17,24 +17,21 @@ void packU48FormatToThreePacket(u32 * out, u8 * in) ; void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) ; void unpackU96FormatToThreePacket(u8 * out, u32 * in) ; -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } + #define P384_ARC_SC1(rci,S2,S3,S4) \ do { \ __asm__ __volatile__ ( \ diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_3/auxFormat.h index b2d5511..9dd0130 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_3/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_3/auxFormat.h @@ -22,55 +22,62 @@ typedef unsigned long long u64; { \ t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ } -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } + #define unpackU96FormatToThreePacket( out, in) {\ - u32 t[3] = { 0 };\ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ - ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ - ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ - t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ - ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ - ((t0 & 0x3fc00) >> 10); \ - t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ - ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ - ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ - unpuckU32ToThree_3(t[0]); \ - unpuckU32ToThree_3(t[1]); \ - unpuckU32ToThree_3(t[2]); \ - memcpy(out, t, 12 * sizeof(unsigned char)); \ + t3=in[0] ; \ + t1=in[1] ; \ + t2=in[2] ; \ + temp0[0] = t2 & 0x7ff; \ + temp0[1] = t1 & 0x7ff; \ + temp0[2] = t3 & 0x3ff; \ + temp1[0] = (t3>>10) & 0x7ff; \ + temp1[1] = (t2 >>11 ) & 0x7ff; \ + temp1[2] = (t1 >> 11) & 0x3ff; \ + temp2[0] = t1 >> 21; \ + temp2[1] = t3 >> 21; \ + temp2[2] = t2 >> 22; \ + unpuckU32ToThree_1(temp0[0]); \ + unpuckU32ToThree_1(temp0[1]); \ + unpuckU32ToThree_1(temp0[2]); \ + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; \ + unpuckU32ToThree_1(temp1[0]); \ + unpuckU32ToThree_1(temp1[1]); \ + unpuckU32ToThree_1(temp1[2]); \ + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; \ + unpuckU32ToThree_1(temp2[0]); \ + unpuckU32ToThree_1(temp2[1]); \ + unpuckU32ToThree_1(temp2[2]); \ + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ } #define packU48FormatToThreePacket(out, in) {\ - u32 t0 = U32BIG(*(u32*)(in)), t1 = (u32)U16BIG(*(u16*)(in + 4)); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - out[0] = \ - (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ - ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ - out[1] = \ - ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ - (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ - out[2] = \ - (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ - (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ + t1 = (u32)U16BIG(*(u16*)(in + 4)); \ + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; \ + puckU32ToThree_1(temp0[0]); \ + puckU32ToThree_1(temp0[1]); \ + puckU32ToThree_1(temp0[2]); \ + temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; \ + puckU32ToThree_1(temp1[0]); \ + puckU32ToThree_1(temp1[1]); \ + puckU32ToThree_1(temp1[2]); \ + out[0] = (temp1[0] << 10) | temp0[2]; \ + out[1] = (temp1[2] << 11) | temp0[1]; \ + out[2] = (temp1[1] << 11) | temp0[0]; \ } #define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ t3= LOTR32(t2, 2);\ diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_3/hash.c b/knot/Implementations/crypto_hash/knot384/armcortexm_3/hash.c index 746235b..11be73a 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_3/hash.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_3/hash.c @@ -31,6 +31,10 @@ int crypto_hash(unsigned char *out, const unsigned char *in, u32 s_temp[12] = { 0 }; u8 i; u32 dataFormat[3] = { 0 }; + u32 t[3] = { 0 }; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; // initialization u32 s[12] = { 0 }; u8 tempData[12]; diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.c b/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.c index 46f3a49..b32f2c5 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.c @@ -1,35 +1,49 @@ #include"auxFormat.h" -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 }; \ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ - ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ - ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ - t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ - ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ - ((t0 & 0x3fc00) >> 10); \ - t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ - ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ - ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ - unpuckU32ToThree_3(t[0]); \ - unpuckU32ToThree_3(t[1]); \ - unpuckU32ToThree_3(t[2]); \ - memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0]>>10) & 0x7ff; + temp1[1] = (in[2] >>11 ) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } + void packU48FormatToThreePacket(u32 * out, u8 * in) { - u32 t0 = U32BIG(*(u32*)(in)), t1 = (u32)U16BIG(*(u16*)(in + 4)); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - out[0] = \ - (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ - ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ - out[1] = \ - ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ - (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ - out[2] = \ - (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ - (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ + u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + out[0] = (temp1[0] << 10) | temp0[2]; + out[1] = (temp1[2] << 11) | temp0[1]; + out[2] = (temp1[1] << 11) | temp0[0]; } unsigned char constant7Format[104] = { diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.h index e814e1d..b836c02 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_4/auxFormat.h @@ -39,21 +39,17 @@ t4 = LOTR32(t2, 18);\ t5 = LOTR32(t0, 19); \ } unsigned char constant7Format[104]; -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.c b/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.c index 4612893..0693568 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_5/auxFormat.c @@ -1,54 +1,51 @@ #include"auxFormat.h" -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -} -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -} -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { - u32 t[3] = { 0 }; \ - u32 t0 = in[0], t1 = in[1], t2 = in[2]; \ - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | \ - ((t0 & 0x300) << 20) | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | \ - ((t0 & 0xff) << 16) | ((t1 & 0xff) << 8) | (t2 & 0xff); \ - t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) | \ - ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) | ((t2 & 0x7f800) >> 3) | \ - ((t0 & 0x3fc00) >> 10); \ - t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) | ((t2 & 0xc0000000) >> 2) | \ - ((t0 & 0x60000000) >> 3) | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) | \ - ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); \ - unpuckU32ToThree_3(t[0]); \ - unpuckU32ToThree_3(t[1]); \ - unpuckU32ToThree_3(t[2]); \ - memcpy(out, t, 12 * sizeof(unsigned char)); \ +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0]>>10) & 0x7ff; + temp1[1] = (in[2] >>11 ) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); } + void packU48FormatToThreePacket(u32 * out, u8 * in) { - u32 t0 = U32BIG(*(u32*)(in)), t1 = (u32)U16BIG(*(u16*)(in + 4)); \ - puckU32ToThree_3(t0); \ - puckU32ToThree_3(t1); \ - out[0] = \ - (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) | ((t1 & 0x000000ff) << 10)) | \ - ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); \ - out[1] = \ - ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) | \ - (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) | ((t0 & 0xff00) >> 8))); \ - out[2] = \ - (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) | ((t1 & 0xff00) << 3)) | \ - (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) | (t0 & 0x000000ff)); \ + u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + out[0] = (temp1[0] << 10) | temp0[2]; + out[1] = (temp1[2] << 11) | temp0[1]; + out[2] = (temp1[1] << 11) | temp0[0]; } + unsigned char constant7Format[104] = { /*constant7Format[127]: 12*9=108*/ 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c index b3757c3..8e1858a 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c @@ -1,40 +1,50 @@ #include"auxFormat.h" -void unpackU96FormatToThreePacket(u8 *out, u32 *in) { +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; u32 t[3] = { 0 }; - u32 t0 = in[0], t1 = in[1], t2 = in[2]; - t[0] = ((t1 & 0x400) << 21) | ((t2 & 0x400) << 20) | ((t0 & 0x300) << 20) - | ((t1 & 0x300) << 18) | ((t2 & 0x300) << 16) | ((t0 & 0xff) << 16) - | ((t1 & 0xff) << 8) | (t2 & 0xff); - t[1] = ((t2 & 0x200000) << 10) | ((t0 & 0x100000) << 10) - | ((t1 & 0x180000) << 9) | ((t2 & 0x180000) << 7) - | ((t0 & 0xc0000) << 6) | ((t1 & 0x7f800) << 5) - | ((t2 & 0x7f800) >> 3) | ((t0 & 0x3fc00) >> 10); - t[2] = ((t0 & 0x80000000)) | ((t1 & 0x80000000) >> 1) - | ((t2 & 0xc0000000) >> 2) | ((t0 & 0x60000000) >> 3) - | ((t1 & 0x60000000) >> 5) | ((t2 & 0x3fc00000) >> 6) - | ((t0 & 0x1fe00000) >> 13) | ((t1 & 0x1fe00000) >> 21); - unpuckU32ToThree_3(t[0]); - unpuckU32ToThree_3(t[1]); - unpuckU32ToThree_3(t[2]); + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0]>>10) & 0x7ff; + temp1[1] = (in[2] >>11 ) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; memcpy(out, t, 12 * sizeof(unsigned char)); } -void packU48FormatToThreePacket(u32 *out, u8 *in) { - u32 t0 = U32BIG(*(u32* )(in)), t1 = (u32) U16BIG(*(u16* )(in + 4)); - puckU32ToThree_3(t0); - puckU32ToThree_3(t1); - out[0] = (((t1 & 0x40000000) >> 10) | ((t1 & 0x03000000) >> 6) - | ((t1 & 0x000000ff) << 10)) - | ((((t0 & 0x30000000) >> 20) | ((t0 & 0xff0000) >> 16))); - out[1] = ((((t1 & 0x30000000) >> 9) | ((t1 & 0xff0000) >> 5)) - | (((t0 & 0x80000000) >> 21) | ((t0 & 0xc000000) >> 18) - | ((t0 & 0xff00) >> 8))); - out[2] = (((t1 & 0x80000000) >> 10) | ((t1 & 0xc000000) >> 7) - | ((t1 & 0xff00) << 3)) - | (((t0 & 0x40000000) >> 20) | ((t0 & 0x03000000) >> 16) - | (t0 & 0x000000ff)); -} +void packU48FormatToThreePacket(u32 * out, u8 * in) { + u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + out[0] = (temp1[0] << 10) | temp0[2]; + out[1] = (temp1[2] << 11) | temp0[1]; + out[2] = (temp1[1] << 11) | temp0[0]; +} unsigned char constant7Format[104] = { /*constant7Format[127]: 12*9=108*/ 0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h index 39d3c6a..032200d 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h @@ -12,24 +12,19 @@ typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; - -#define puckU32ToThree_3(lo){\ -u32 r0;\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ } -#define unpuckU32ToThree_3(lo){\ - u32 r0;\ -r0 = (lo ^ (lo << 12)) & 0x000f0000, lo ^= r0 ^ (r0 >> 12);\ -r0 = (lo ^ (lo << 4)) & 0x000f0f00, lo ^= r0 ^ (r0 >> 4);\ -r0 = (lo ^ (lo << 6)) & 0x00300300, lo ^= r0 ^ (r0 >> 6);\ -r0 = (lo ^ (lo << 2)) & 0x00330330, lo ^= r0 ^ (r0 >> 2);\ -r0 = (lo ^ (lo << 3)) & 0x10410410, lo ^= r0 ^ (r0 >> 3);\ -r0 = (lo ^ (lo << 1)) & 0x14514514, lo ^= r0 ^ (r0 >> 1);\ +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ } #define ARC(rci) \ do { \ diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_6/hash.c b/knot/Implementations/crypto_hash/knot384/armcortexm_6/hash.c index 3a9b7b5..7a2c661 100644 --- a/knot/Implementations/crypto_hash/knot384/armcortexm_6/hash.c +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_6/hash.c @@ -8,7 +8,7 @@ int crypto_hash(unsigned char *out, const unsigned char *in, unsigned long long inlen) { - u32 dataFormat[3] = { 0 }, t1, t2; + u32 dataFormat[3] = { 0 }, t2; // initialization u32 s[12] = { 0 }; u8 tempData[12];