diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/api.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/api.h new file mode 100644 index 0000000..732ae75 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/api.h @@ -0,0 +1,8 @@ + +#define CRYPTO_KEYBYTES 16 // +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/auxFormat.c new file mode 100644 index 0000000..860c9f8 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/auxFormat.c @@ -0,0 +1,187 @@ +#include"auxFormat.h" +void P256(unsigned int *s, unsigned char *rc, unsigned char rounds) +{ + unsigned int reg1, reg2; + asm volatile ( + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], s[4], s[6]); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[S_4], %[reg1] \n\t" + "orrs %[S_4], %[S_2], %[S_4] \n\t" + "eors %[S_0], %[S_6], %[S_0] \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] \n\t" + "eors %[S_6], %[S_6], %[reg1] \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], s[3], s[5], s[7]) */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_3], %[S_1] \n\t" + "eors %[reg1], %[S_5], %[reg1] \n\t" + "orrs %[S_5], %[S_3], %[S_5] \n\t" + "eors %[S_1], %[S_7], %[S_1] \n\t" + "eors %[S_5], %[S_5], %[S_1] \n\t" + "eors %[reg2], %[S_3], %[S_7] \n\t" + "eors %[S_7], %[S_7], %[reg1] \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[reg2],%[S_1] \n\t" + "ands %[S_3], %[S_5], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "enc_loop2: \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_3], %[reg1],%[S_3] \n\t" + "/*loop control*/ \n\t" + "subs %[ro], %[ro], #1 \n\t" + "bne enc_loop2 \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "ROR %[S_3], #31 \n\t" + "ROR %[S_4], #28 \n\t" + "ROR %[S_5], #28 \n\t" + "ROR %[S_6], #20 \n\t" + "ROR %[S_7], #19 \n\t" + : /* output variables - including inputs that are changed */ + [ro] "+r" (rounds),[reg1] "=r" (reg1), [reg2] "=r" (reg2), [rc] "+r" (rc), + [S_0] "+r" (s[0]), [S_2] "+r" (s[2]), [S_4] "+r" (s[4]), [S_6] "+r" (s[6]) , + [S_1] "+r" (s[1]), [S_3] "+r" (s[3]), [S_5] "+r" (s[5]), [S_7] "+r" (s[7]) + : /* input variables */ + : /* clobber registers for temporary values */ + ); +} + +void packFormat(u32 * out, const u8 * in) { + u32 t0 = U32BIG(((u32* )in)[0]); + u32 t1 = U32BIG(((u32* )in)[1]); + u32 r0, r1; + r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); + r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); + r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); + r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); //t0 odd even + r1 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= r1 ^ (r1 << 1); + r1 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= r1 ^ (r1 << 2); + r1 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= r1 ^ (r1 << 4); + r1 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= r1 ^ (r1 << 8); //t1 odd even + out[0] = (t1 & 0xFFFF0000) | (t0 >> 16); // t1.odd|t0.odd + out[1] = (t1 << 16) | (t0 & 0x0000FFFF); // t1.even|t0.even +} +void unpackFormat(u8 * out, u32 * in) { + u32 t[2] = { 0 }; + t[1] = (in[0] & 0xFFFF0000) | (in[1] >> 16); + t[0] = (in[1] & 0x0000FFFF) | (in[0] << 16); + u32 r0, r1; + r0 = (t[0] ^ (t[0] >> 8)) & 0x0000FF00, t[0] ^= r0 ^ (r0 << 8); + r0 = (t[0] ^ (t[0] >> 4)) & 0x00F000F0, t[0] ^= r0 ^ (r0 << 4); + r0 = (t[0] ^ (t[0] >> 2)) & 0x0C0C0C0C, t[0] ^= r0 ^ (r0 << 2); + r0 = (t[0] ^ (t[0] >> 1)) & 0x22222222, t[0] ^= r0 ^ (r0 << 1); + r1 = (t[1] ^ (t[1] >> 8)) & 0x0000FF00, t[1] ^= r1 ^ (r1 << 8); + r1 = (t[1] ^ (t[1] >> 4)) & 0x00F000F0, t[1] ^= r1 ^ (r1 << 4); + r1 = (t[1] ^ (t[1] >> 2)) & 0x0C0C0C0C, t[1] ^= r1 ^ (r1 << 2); + r1 = (t[1] ^ (t[1] >> 1)) & 0x22222222, t[1] ^= r1 ^ (r1 << 1); + memcpy(out, t, 8 * sizeof(unsigned char)); +} + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/auxFormat.h new file mode 100644 index 0000000..56fdf63 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/auxFormat.h @@ -0,0 +1,12 @@ +#include"crypto_aead.h" +#include"api.h" +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +void P256(unsigned int *s, unsigned char *rc, unsigned char rounds); +void packFormat(u32 * out, const u8 * in); +void unpackFormat(u8 * out, u32 * in); diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/encrypt.c new file mode 100644 index 0000000..f2d3c60 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_6/encrypt.c @@ -0,0 +1,160 @@ +#include"auxFormat.h" + +//#define RATE (64 / 8) +#define RATE 8 + +#define PR0_ROUNDS 25 +#define PR_ROUNDS 13 +#define PRF_ROUNDS 15 + +unsigned char constant6Format[52] = { 0x01, 0x10, 0x02, 0x20, 0x04, 0x41, 0x11, + 0x12, 0x22, 0x24, 0x45, 0x50, 0x03, 0x30, 0x06, 0x61, 0x15, 0x53, 0x33, + 0x36, 0x67, 0x74, 0x46, 0x60, 0x05, 0x51, 0x13, 0x32, 0x26, 0x65, 0x54, + 0x42, 0x21, 0x14, 0x43, 0x31, 0x16, 0x63, 0x35, 0x57, 0x72, 0x27, 0x75, + 0x56, 0x62, 0x25, 0x55, 0x52, 0x23, 0x34, 0x47, 0x70, }; + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[8] = { 0 }; + u32 dataFormat[2] = { 0 }; + u8 tempData[16]; + //initialization + *clen = mlen + CRYPTO_ABYTES; + packFormat(s, npub); + packFormat((s + 2), (npub + 8)); + packFormat((s + 4), k); + packFormat((s + 6), (k + 8)); + P256(s, constant6Format, PR0_ROUNDS); + // process associated data + if (adlen) { + while (adlen >= RATE) { + packFormat(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + } + s[6] ^= 0x80000000; + //Encryption: + if (mlen) { + while (mlen >= RATE) { + packFormat(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(c, s); + + P256(s, constant6Format, PR_ROUNDS); + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + P256(s, constant6Format, PRF_ROUNDS); + unpackFormat(tempData, s); + unpackFormat((tempData + 8), (s + 2)); + memcpy(c, tempData, CRYPTO_ABYTES); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[8] = { 0 }; + u32 dataFormat[4] = { 0 }; + u32 dataFormat_1[2] = { 0 }; + u8 tempU8[32] = { 0 }; + u8 tempData[16]; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + //initialization + packFormat(s, npub); + packFormat((s + 2), (npub + 8)); + packFormat((s + 4), k); + packFormat((s + 6), (k + 8)); + P256(s, constant6Format, PR0_ROUNDS); + // process associated data + if (adlen) { + while (adlen >= RATE) { + packFormat(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + } + + s[6] ^= 0x80000000; + // process c + clen = clen - CRYPTO_KEYBYTES; + if (clen) { + while (clen >= RATE) { + packFormat(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + unpackFormat(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + clen -= RATE; + m += RATE; + c += RATE; + } + unpackFormat(tempU8, s); + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + c += clen; + + packFormat(s, tempU8); + } + // finalization + P256(s, constant6Format, PRF_ROUNDS); + unpackFormat(tempData, s); + unpackFormat((tempData + 8), (s + 2)); + if (memcmp((void*) tempData, (void*) c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/api.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/api.h new file mode 100644 index 0000000..d8257f4 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/api.h @@ -0,0 +1,7 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/auxFormat.h new file mode 100644 index 0000000..a19c96a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/auxFormat.h @@ -0,0 +1,116 @@ + +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} + +#define ROUND384(lunNum) {\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ +s[5] = LOTR32(s_temp[3], 1); \ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +} + +//Processing_Data: +#define Processing_Data(data) \ +do { \ + packU96FormatToThreePacket(dataFormat, data); \ + s[0] ^= dataFormat[0]; \ + s[1] ^= dataFormat[1]; \ + s[2] ^= dataFormat[2]; \ + packU96FormatToThreePacket((dataFormat + 3), (data + 12)); \ + s[3] ^= dataFormat[3]; \ + s[4] ^= dataFormat[4]; \ + s[5] ^= dataFormat[5]; \ +} while (0) + +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} + +#define packU96FormatToThreePacket( out, in) { \ + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; \ + puckU32ToThree_1(temp0[0]); \ + puckU32ToThree_1(temp0[1]); \ + puckU32ToThree_1(temp0[2]); \ + temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; \ + puckU32ToThree_1(temp1[0]); \ + puckU32ToThree_1(temp1[1]); \ + puckU32ToThree_1(temp1[2]); \ + temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; \ + puckU32ToThree_1(temp2[0]); \ + puckU32ToThree_1(temp2[1]); \ + puckU32ToThree_1(temp2[2]); \ + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; \ + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; \ + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; \ +} + +#define unpackU96FormatToThreePacket( out, in) {\ + temp0[0] = in[2] & 0x7ff; \ + temp0[1] = in[1] & 0x7ff; \ + temp0[2] = in[0] & 0x3ff; \ + temp1[0] = (in[0]>>10) & 0x7ff; \ + temp1[1] = (in[2] >>11 ) & 0x7ff; \ + temp1[2] = (in[1] >> 11) & 0x3ff; \ + temp2[0] = in[1] >> 21; \ + temp2[1] = in[0] >> 21; \ + temp2[2] = in[2] >> 22; \ + unpuckU32ToThree_1(temp0[0]); \ + unpuckU32ToThree_1(temp0[1]); \ + unpuckU32ToThree_1(temp0[2]); \ + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; \ + unpuckU32ToThree_1(temp1[0]); \ + unpuckU32ToThree_1(temp1[1]); \ + unpuckU32ToThree_1(temp1[2]); \ + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; \ + unpuckU32ToThree_1(temp2[0]); \ + unpuckU32ToThree_1(temp2[1]); \ + unpuckU32ToThree_1(temp2[2]); \ + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ +} + + + + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/crypto_aead.h new file mode 100644 index 0000000..cdfdf19 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/crypto_aead.h @@ -0,0 +1,17 @@ +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/encrypt.c new file mode 100644 index 0000000..6c76253 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3_1/encrypt.c @@ -0,0 +1,210 @@ +#include"auxFormat.h" + +#define aead_RATE (192 / 8) + +#define PR0_ROUNDS 76 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 +unsigned char constant7Format[76] = { +/*constant7Format[127]:*/ +0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, + 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, + 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, + 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, + 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, + 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, + 0x1e, 0xd1, 0x8f, }; + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u8 i; + u32 s[12] = { 0 }; + u8 tempData[24] = { 0 }; + u32 dataFormat[6] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + u8 tempU8[24] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); + memcpy(tempData + 4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket((s + 3), tempData); + packU96FormatToThreePacket((s + 6), (tempData + 12)); + s[9] = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + Processing_Data(ad); + + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + Processing_Data(tempData); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + // process p data + if (mlen) { + while (mlen >= aead_RATE) { + Processing_Data(m); + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket((c + 12), (s + 3)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + Processing_Data(tempData); + unpackU96FormatToThreePacket(tempData, s); + unpackU96FormatToThreePacket((tempData + 12), (s + 3)); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + memcpy(c, tempU8, sizeof(unsigned char) * CRYPTO_ABYTES); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + + u8 i; + u32 s[12] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 dataFormat[12] = { 0 }; + u32 dataFormat_1[12] = { 0 }; + u8 tempData[24] = { 0 }; + u8 tempU8[24] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); + memcpy(tempData + 4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket((s + 3), tempData); + packU96FormatToThreePacket((s + 6), (tempData + 12)); + s[9] = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + Processing_Data(ad); + + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + Processing_Data(tempData); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + /////////// + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (c + 12)); + dataFormat_1[3] = s[3] ^ dataFormat[3]; + dataFormat_1[4] = s[4] ^ dataFormat[4]; + dataFormat_1[5] = s[5] ^ dataFormat[5]; + unpackU96FormatToThreePacket(m, dataFormat_1); + unpackU96FormatToThreePacket((m + 12), (dataFormat_1 + 3)); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; + s[4] = dataFormat[4]; + s[5] = dataFormat[5]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + memset(tempData, 0, aead_RATE); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG( + ((u32* )tempData)[2]); + U32BIG(((u32*)tempU8)[3]) ^= U32BIG( + ((u32* )tempData)[3]); + U32BIG(((u32*)tempU8)[4]) ^= U32BIG( + ((u32* )tempData)[4]); + U32BIG(((u32*)tempU8)[5]) ^= U32BIG( + ((u32* )tempData)[5]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; + packU96FormatToThreePacket(s, tempU8); + packU96FormatToThreePacket((s + 3), (tempU8 + 12)); + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + if (memcmp((void*) tempU8, (void*) c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/api.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/api.h new file mode 100644 index 0000000..d8257f4 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/api.h @@ -0,0 +1,7 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/auxFormat.c new file mode 100644 index 0000000..83e66c4 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/auxFormat.c @@ -0,0 +1,76 @@ +#include"auxFormat.h" + +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; +} +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0]>>10) & 0x7ff; + temp1[1] = (in[2] >>11 ) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); +} +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + unsigned char i; + for (i = 0; i < lunNum; i++) { +s[0] ^= (round[i] >> 6) & 0x3;\ +s[1] ^= (round[i] >> 3) & 0x7;\ +s[2] ^= round[i] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ +s[5] = LOTR32(s_temp[3], 1); \ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ + } +} +//12*7=84 +unsigned char constant7Format[80] = { + /*constant7Format[127]:*/ + 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, + 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, + 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, + 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, + 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, + 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, + 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,}; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/auxFormat.h new file mode 100644 index 0000000..1073708 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/auxFormat.h @@ -0,0 +1,53 @@ + +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define aead_RATE 24 +//#define aead_RATE (192 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +//55=3*18+1 +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} + +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} + +unsigned char constant7Format[80]; diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/encrypt.c new file mode 100644 index 0000000..30b9547 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_4_1/encrypt.c @@ -0,0 +1,192 @@ +#include"auxFormat.h" + +void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { + u8 tempData[24] = { 0 }; + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); + memcpy(tempData + 4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket(s + 3, tempData); + packU96FormatToThreePacket(s + 6, tempData + 12); + s[9] = 0x80000000; + P384(s, constant7Format, PR0_ROUNDS); +} +void ProcessAssocData(u32 *s, const u8 *ad, unsigned long long adlen) { + u32 dataFormat[6] = { 0 }; + u8 tempData[24] = { 0 }; + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket(dataFormat + 3, ad + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + P384(s, constant7Format, PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket(dataFormat + 3, tempData + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + + P384(s, constant7Format, PR_ROUNDS); + } + s[9] ^= 0x80000000; +} + +void ProcessPlaintext(u32 *s, const u8 *m, unsigned long long mlen, + unsigned char *c) { + u32 dataFormat[6] = { 0 }; + u8 tempData[24] = { 0 }; + if (mlen) { + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket(dataFormat + 3, m + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket(c + 12, s + 3); + + P384(s, constant7Format, PR_ROUNDS); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket(dataFormat + 3, tempData + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + unpackU96FormatToThreePacket(tempData, s); + unpackU96FormatToThreePacket(tempData + 12, s + 3); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + } +} +void Finalize_GenerateTag(u32 *s, unsigned char *c) { + u8 tempData[12] = { 0 }; + P384(s, constant7Format, PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket(tempData, s + 3); + memcpy(c + 12, tempData, sizeof(unsigned char) * 4); +} +void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, + unsigned long long clen) { + u32 dataFormat[12] = { 0 }; + u32 dataFormat_1[12] = { 0 }; + u8 tempU8[24] = { 0 }, tempData[24] = { 0 }; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + packU96FormatToThreePacket(dataFormat + 3, c + 12); + dataFormat_1[3] = s[3] ^ dataFormat[3]; + dataFormat_1[4] = s[4] ^ dataFormat[4]; + dataFormat_1[5] = s[5] ^ dataFormat[5]; + unpackU96FormatToThreePacket(m, dataFormat_1); + unpackU96FormatToThreePacket(m + 12, dataFormat_1 + 3); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; + s[4] = dataFormat[4]; + s[5] = dataFormat[5]; + + P384(s, constant7Format, PR_ROUNDS); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); + memset(tempData, 0, aead_RATE); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG( + ((u32* )tempData)[2]); + U32BIG(((u32*)tempU8)[3]) ^= U32BIG( + ((u32* )tempData)[3]); + U32BIG(((u32*)tempU8)[4]) ^= U32BIG( + ((u32* )tempData)[4]); + U32BIG(((u32*)tempU8)[5]) ^= U32BIG( + ((u32* )tempData)[5]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; + packU96FormatToThreePacket(s, tempU8); + packU96FormatToThreePacket(s + 3, tempU8 + 12); + } +} +int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m, + unsigned long long *mlen) { + u8 tempU8[24] = { 0 }; + P384(s, constant7Format, PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); + if (memcmp((void*) tempU8, (void*) (c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[12] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessPlaintext(s, m, mlen, c); + // finalization + Finalize_GenerateTag(s, c + mlen); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[12] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessCiphertext(s, m, c, clen - CRYPTO_ABYTES); + // finalization + return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen); +} diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/api.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/api.h new file mode 100644 index 0000000..d8257f4 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/api.h @@ -0,0 +1,7 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c new file mode 100644 index 0000000..e0b8ef8 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.c @@ -0,0 +1,87 @@ +#include"auxFormat.h" + + + +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + temp0[0] = U32BIG(((u32* )in)[0]); + temp0[1] = U32BIG(((u32*)in)[0]) >> 1; + temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = U32BIG(((u32* )in)[1]); + temp1[1] = U32BIG(((u32*)in)[1]) >> 1; + temp1[2] = U32BIG(((u32*)in)[1]) >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + temp2[0] = U32BIG(((u32* )in)[2]); + temp2[1] = U32BIG(((u32*)in)[2]) >> 1; + temp2[2] = U32BIG(((u32*)in)[2]) >> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1] << 21) | (temp1[0] << 10) | temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; +} +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0] >> 10) & 0x7ff; + temp1[1] = (in[2] >> 11) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); +} +void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { + unsigned int t, t1, t2; + u32 rci, temp; + rci = c[0]; + ARC(rci); + SBOX(s[0], s[3], s[6], s[9]); + SBOX(s[1], s[4], s[7], s[10]); + SBOX(s[2], s[5], s[8], s[11]); + t = 1; + while (lunnum--) { + temp = ((u32*) (c + t))[0]; + rci = temp & 0xff; + ARC(rci); + SBOX1_ROR(s[0], s[4], s[8], s[10] ); + SBOX2_ROR(s[1], s[5], s[6], s[11]); + SBOX3_ROR(s[2], s[3], s[7], s[9]); + rci = (temp & 0xff00) >> 8; + ARC(rci); + SBOX1_ROR(s[0], s[5], s[7], s[11]); + SBOX2_ROR(s[1], s[3], s[8], s[9]); + SBOX3_ROR(s[2], s[4], s[6], s[10]); + rci = (temp & 0xff0000) >> 16; + ARC(rci); + SBOX1_ROR(s[0], s[3], s[6], s[9]); + SBOX2_ROR(s[1], s[4], s[7], s[10]); + SBOX3_ROR(s[2], s[5], s[8], s[11]); + t += 3; + } +} diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h new file mode 100644 index 0000000..f2d0b5c --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/auxFormat.h @@ -0,0 +1,201 @@ +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); + +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +//Processing_Data: +#define Processing_Data(data) \ +do { \ + packU96FormatToThreePacket(dataFormat, data); \ + s[0] ^= dataFormat[0]; \ + s[1] ^= dataFormat[1]; \ + s[2] ^= dataFormat[2]; \ + packU96FormatToThreePacket((dataFormat + 3), (data + 12)); \ + s[3] ^= dataFormat[3]; \ + s[4] ^= dataFormat[4]; \ + s[5] ^= dataFormat[5]; \ +} while (0) +///////////////////////// +void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); +#define ARC(rci) \ + do { \ + __asm__ __volatile__ ( \ + "/*add round const s0 s1 s2 */ \n\t"\ + "ands %[t1], %[rci], #0xc0\n\t" \ + "eors %[S_0], %[S_0], %[t1], LSR #6 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x38\n\t" \ + "eors %[S_1], %[S_1], %[t1], LSR #3 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x7\n\t" \ + "eors %[S_3], %[S_3], %[t1] \n\t" /*s[2] ^= constant7Format[lunNum] & 0x7;*/\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [rci] "+r" (rci), \ + [S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_3] "+r" (s[2])\ + : : );\ +}while (0) +#define SBOX(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX1_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[t1] , %[S_4] ,ROR #30 \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] ,ROR #30 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_6], %[t1], %[S_6] ,ROR #14 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX2_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[t1] , %[S_4] ,ROR #29 \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] ,ROR #29 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_6], %[t1], %[S_6] ,ROR #14 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) + +#define SBOX3_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_0], %[S_2] ,ROR #31 \n\t"\ + "eors %[t1], %[t1], %[S_4] ,ROR #29 \n\t"\ + "orrs %[S_4], %[S_4], %[S_2] ,ROR #2 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #13 \n\t"\ + "eors %[S_4], %[S_0], %[S_4] ,ROR 29 \n\t"\ + "eors %[t2], %[S_6], %[S_2] ,ROR #18 \n\t"\ + "eors %[S_6], %[t1] , %[S_6] ,ROR #13 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[S_0] , %[t2] ,ROR #13 \n\t"\ + "ands %[S_2], %[S_4], %[t2] ,ROR #13 \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define P384_1( s, round, lunNum) {\ + u32 t1;\ + ROUND384_Three(s,round,lunNum);\ + __asm__ __volatile__ ( \ + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ + "mov %[t1], %[S_3] \n\t"\ + "mov %[S_3], %[S_4] \n\t"\ + "mov %[S_4], %[S_5] \n\t"\ + "ROR %[S_5], %[t1] , #31 \n\t"\ + "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t"\ + "mov %[t1], %[S_8] \n\t"\ + "ROR %[S_8], %[S_7] , #29 \n\t"\ + "ROR %[S_7], %[S_6] , #29 \n\t"\ + "ROR %[S_6], %[t1] , #30 \n\t"\ + "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t1], %[S_9] \n\t"\ + "ROR %[S_9], %[S_10] , #14 \n\t"\ + "ROR %[S_10], %[S_11] , #14 \n\t"\ + "ROR %[S_11], %[t1] , #13 \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1),\ + [S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\ + [S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\ + [S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\ + : : );\ +} +#define P384_2( s, round, lunNum) {\ + u32 t1,rci;\ + ROUND384_Three(s,round,lunNum);\ + rci=round[lunNum*3+1];\ +ARC(rci);\ +SBOX1_ROR(s[0], s[4], s[8], s[10] );\ +SBOX2_ROR(s[1], s[5], s[6], s[11]);\ +SBOX3_ROR(s[2], s[3], s[7], s[9]);\ + __asm__ __volatile__ ( \ + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ + "mov %[t1], %[S_4] \n\t"\ + "mov %[S_4], %[S_3] \n\t"\ + "mov %[S_3], %[S_5] \n\t"\ + "ROR %[S_5], %[t1] , #31 \n\t"\ + "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t"\ + "mov %[t1], %[S_8] \n\t"\ + "ROR %[S_8], %[S_6] , #29 \n\t"\ + "ROR %[S_6], %[S_7] , #30 \n\t"\ + "ROR %[S_7], %[t1] , #29 \n\t"\ + "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t1], %[S_10] \n\t"\ + "ROR %[S_10], %[S_9] , #14 \n\t"\ + "ROR %[S_9], %[S_11] , #14 \n\t"\ + "ROR %[S_11], %[t1] , #13 \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1),\ + [S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\ + [S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\ + [S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\ + : : );\ +} + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/crypto_aead.h new file mode 100644 index 0000000..cdfdf19 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/crypto_aead.h @@ -0,0 +1,17 @@ +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/encrypt.c new file mode 100644 index 0000000..9a004a3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_6/encrypt.c @@ -0,0 +1,167 @@ +#include"auxFormat.h" + +//#define aead_RATE (192 / 8) +#define aead_RATE 24 + +#define PR0_ROUNDS 25 +#define PR_ROUNDS 9 +#define PRF_ROUNDS 10 +unsigned char constant7Format[76] = { 0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, + 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, + 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, + 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, + 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, + 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, + 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, 0x1e, 0xd1, 0x8f, }; + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[12] = { 0 }; + u8 tempData[24] = { 0 }; + u32 dataFormat[6] = { 0 }; + u8 tempU8[24] = { 0 }; + u32 t2; + *clen = mlen + CRYPTO_ABYTES; + // initialization + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); + memcpy(tempData + 4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket((s + 3), tempData); + packU96FormatToThreePacket((s + 6), (tempData + 12)); + s[9] = 0x80000000; + P384_1(s, constant7Format, PR0_ROUNDS); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + Processing_Data(ad); + P384_1(s, constant7Format, PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + Processing_Data(tempData); + P384_1(s, constant7Format, PR_ROUNDS); + } + s[9] ^= 0x80000000; + // process p data + if (mlen) { + while (mlen >= aead_RATE) { + Processing_Data(m); + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket((c + 12), (s + 3)); + P384_1(s, constant7Format, PR_ROUNDS); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + Processing_Data(tempData); + unpackU96FormatToThreePacket(tempData, s); + unpackU96FormatToThreePacket((tempData + 12), (s + 3)); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + P384_2(s, constant7Format, PRF_ROUNDS); + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + memcpy(c, tempU8, sizeof(unsigned char) * CRYPTO_ABYTES); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[12] = { 0 }; + u32 dataFormat[12] = { 0 }; + u32 dataFormat_1[12] = { 0 }; + u8 tempData[24] = { 0 }; + u8 tempU8[24] = { 0 }; + u32 t2; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); + memcpy(tempData + 4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket((s + 3), tempData); + packU96FormatToThreePacket((s + 6), (tempData + 12)); + s[9] = 0x80000000; + P384_1(s, constant7Format, PR0_ROUNDS); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + Processing_Data(ad); + P384_1(s, constant7Format, PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + Processing_Data(tempData); + P384_1(s, constant7Format, PR_ROUNDS); + } + s[9] ^= 0x80000000; + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (c + 12)); + dataFormat_1[3] = s[3] ^ dataFormat[3]; + dataFormat_1[4] = s[4] ^ dataFormat[4]; + dataFormat_1[5] = s[5] ^ dataFormat[5]; + unpackU96FormatToThreePacket(m, dataFormat_1); + unpackU96FormatToThreePacket((m + 12), (dataFormat_1 + 3)); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; + s[4] = dataFormat[4]; + s[5] = dataFormat[5]; + P384_1(s, constant7Format, PR_ROUNDS); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG(((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG(((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG(((u32* )tempData)[2]); + U32BIG(((u32*)tempU8)[3]) ^= U32BIG(((u32* )tempData)[3]); + U32BIG(((u32*)tempU8)[4]) ^= U32BIG(((u32* )tempData)[4]); + U32BIG(((u32*)tempU8)[5]) ^= U32BIG(((u32* )tempData)[5]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c +=clen; + packU96FormatToThreePacket(s, tempU8); + packU96FormatToThreePacket((s + 3), (tempU8 + 12)); + } + // finalization + P384_2(s, constant7Format, PRF_ROUNDS); + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + if (memcmp((void*) tempU8, (void*) (c ), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/api.h b/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/api.h new file mode 100644 index 0000000..c3cb1d9 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/auxFormat.h new file mode 100644 index 0000000..2350eb4 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/auxFormat.h @@ -0,0 +1,123 @@ + +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +unsigned char constant7Format[76] ; +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} +#define packU96FormatToThreePacket( out, in) { \ + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; \ + puckU32ToThree_1(temp0[0]); \ + puckU32ToThree_1(temp0[1]); \ + puckU32ToThree_1(temp0[2]); \ + temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; \ + puckU32ToThree_1(temp1[0]); \ + puckU32ToThree_1(temp1[1]); \ + puckU32ToThree_1(temp1[2]); \ + temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; \ + puckU32ToThree_1(temp2[0]); \ + puckU32ToThree_1(temp2[1]); \ + puckU32ToThree_1(temp2[2]); \ + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; \ + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; \ + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; \ +} +#define unpackU96FormatToThreePacket( out, in) { \ + temp0[0] = in[2] & 0x7ff; \ + temp0[1] = in[1] & 0x7ff; \ + temp0[2] = in[0] & 0x3ff; \ + temp1[0] = (in[0]>>10) & 0x7ff; \ + temp1[1] = (in[2] >>11 ) & 0x7ff; \ + temp1[2] = (in[1] >> 11) & 0x3ff; \ + temp2[0] = in[1] >> 21; \ + temp2[1] = in[0] >> 21; \ + temp2[2] = in[2] >> 22; \ + unpuckU32ToThree_1(temp0[0]); \ + unpuckU32ToThree_1(temp0[1]); \ + unpuckU32ToThree_1(temp0[2]); \ + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; \ + unpuckU32ToThree_1(temp1[0]); \ + unpuckU32ToThree_1(temp1[1]); \ + unpuckU32ToThree_1(temp1[2]); \ + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; \ + unpuckU32ToThree_1(temp2[0]); \ + unpuckU32ToThree_1(temp2[1]); \ + unpuckU32ToThree_1(temp2[2]); \ + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ +} +#define packU48FormatToThreePacket( out, in) { \ + t1 = (u32)U16BIG(*(u16*)(in + 4)); \ + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; \ + puckU32ToThree_1(temp0[0]); \ + puckU32ToThree_1(temp0[1]); \ + puckU32ToThree_1(temp0[2]); \ + temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; \ + puckU32ToThree_1(temp1[0]); \ + puckU32ToThree_1(temp1[1]); \ + puckU32ToThree_1(temp1[2]); \ + out[0] = (temp1[0] << 10) | temp0[2]; \ + out[1] = (temp1[2] << 11) | temp0[1]; \ + out[2] = (temp1[1] << 11) | temp0[0]; \ +} +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} +#define ROUND384(lunNum) {\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ +s[5] = LOTR32(s_temp[3], 1); \ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +} + +#define Processing_Data(data) \ +do { \ + packU96FormatToThreePacket(dataFormat, data); \ + s[0] ^= dataFormat[0]; \ + s[1] ^= dataFormat[1]; \ + s[2] ^= dataFormat[2]; \ +} while (0) + + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/crypto_aead.h b/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/encrypt.c new file mode 100644 index 0000000..0d1b13a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3_1/encrypt.c @@ -0,0 +1,193 @@ +#include"auxFormat.h" + +#define aead_RATE 12 +//#define aead_RATE (96 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 + +unsigned char constant7Format[76] = { +/*constant7Format[127]:*/ +0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, + 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, + 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, + 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, + 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, + 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, + 0x1e, 0xd1, 0x8f, }; + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + + u8 i; + u32 s[12] = { 0 }; + u32 dataFormat[3] = { 0 }; + u8 tempData[12] = { 0 }; + u8 tempU8[48] = { 0 }; + u32 s_temp[12] = { 0 }; + + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + Processing_Data(ad); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + Processing_Data(tempData); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + if (mlen) { + while (mlen >= aead_RATE) { + Processing_Data(m); + unpackU96FormatToThreePacket(c, s); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + Processing_Data(tempData); + unpackU96FormatToThreePacket(tempData, s); + memcpy(c, tempData, mlen); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + + ROUND384(i); + + } + + unpackU96FormatToThreePacket(tempU8, s); + + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + + memcpy(c, tempU8, CRYPTO_ABYTES * sizeof(unsigned char)); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u8 i; + u32 s[12] = { 0 }; + u32 dataFormat[6] = { 0 }; + u32 dataFormat_1[3] = { 0 }; + u8 tempData[12] = { 0 }; + u8 tempU8[48] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + Processing_Data(ad); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + Processing_Data(tempData); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + unpackU96FormatToThreePacket(m, dataFormat_1);\ + + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + memset(tempData, 0, aead_RATE); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG( + ((u32* )tempData)[2]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; + packU96FormatToThreePacket(s, tempU8); + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + + } + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + + if (memcmp((void*) tempU8, (void*) c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/api.h b/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/api.h new file mode 100644 index 0000000..c3cb1d9 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/auxFormat.c new file mode 100644 index 0000000..4940898 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/auxFormat.c @@ -0,0 +1,102 @@ +#include"auxFormat.h" + +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + unsigned char i; + for (i = 0; i < lunNum; i++) { + s[0] ^= (round[i] >> 6) & 0x3; + s[1] ^= (round[i] >> 3) & 0x7; + s[2] ^= round[i] & 0x7; + sbox(s[0], s[3], s[6], s[9], s_temp[3], s_temp[6], s_temp[9]); + sbox(s[1], s[4], s[7], s[10], s[3], s_temp[7], s_temp[10]); + sbox(s[2], s[5], s[8], s[11], s[4], s_temp[8], s_temp[11]); + s[5] = LOTR32(s_temp[3], 1); + U96_BIT_LOTR32_8(s_temp[6], s_temp[7], s_temp[8], s[6], s[7], s[8]); + U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]); + } +} + +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + temp0[0] = U32BIG(((u32* )in)[0]); + temp0[1] = U32BIG(((u32*)in)[0]) >> 1; + temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = U32BIG(((u32* )in)[1]); + temp1[1] = U32BIG(((u32*)in)[1]) >> 1; + temp1[2] = U32BIG(((u32*)in)[1]) >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + temp2[0] = U32BIG(((u32* )in)[2]); + temp2[1] = U32BIG(((u32*)in)[2]) >> 1; + temp2[2] = U32BIG(((u32*)in)[2]) >> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1] << 21) | (temp1[0] << 10) | temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; +} +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0] >> 10) & 0x7ff; + temp1[1] = (in[2] >> 11) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); +} +void packU48FormatToThreePacket(u32 *out, u8 *in) { + u32 t1 = (u32) U16BIG(*(u16* )(in + 4)); + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + temp0[0] = U32BIG(((u32* )in)[0]); + temp0[1] = U32BIG(((u32*)in)[0]) >> 1; + temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = t1; + temp1[1] = t1 >> 1; + temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + out[0] = (temp1[0] << 10) | temp0[2]; + out[1] = (temp1[2] << 11) | temp0[1]; + out[2] = (temp1[1] << 11) | temp0[0]; +} +unsigned char constant7Format[76] = { +/*constant7Format[127]:*/ +0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, + 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, + 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, + 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, + 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, + 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, + 0x1e, 0xd1, 0x8f }; diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/auxFormat.h new file mode 100644 index 0000000..82cdaa3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/auxFormat.h @@ -0,0 +1,53 @@ +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + +void packU48FormatToThreePacket(u32 * out, u8 * in) ; +void unpackU96FormatToThreePacket(u8 * out, u32 * in) ; +void packU96FormatToThreePacket(u32 * out, u8 * in); +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) ; +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +//55=3*18+1 +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} + +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} +unsigned char constant7Format[76]; + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/crypto_aead.h b/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/encrypt.c new file mode 100644 index 0000000..cf28b4d --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_4_1/encrypt.c @@ -0,0 +1,167 @@ +#include"auxFormat.h" + +#define aead_RATE (96 / 8) + +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 +/* + +#define PR0_ROUNDS 76 +#define PR_ROUNDS 56 +#define PRF_ROUNDS 60 + +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 + * */ +void Initialize(u32 *s, const unsigned char *npub, const unsigned char *k) { + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket(s + 3, npub + 12); + packU96FormatToThreePacket(s + 6, k); + packU96FormatToThreePacket(s + 9, k + 12); + P384(s, constant7Format, PR0_ROUNDS); +} + +void ProcessAssocData(u32 *s, const u8* ad, unsigned long long adlen) { + + u32 dataFormat[3] = { 0 }; + u8 tempData[12] = { 0 }; + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384(s, constant7Format, PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384(s, constant7Format, PR_ROUNDS); + } + s[9] ^= 0x80000000; + +} +void ProcessPlaintext(u32 *s, const u8* m, unsigned long long mlen, unsigned char *c) { + + u32 dataFormat[3] = { 0 }; + u8 tempData[12] = { 0 }; + if (mlen) { + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(c, s); + P384(s, constant7Format, PR_ROUNDS); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } +} + +void Finalize_GenerateTag(u32 *s, unsigned char *c) { + u8 tempU8[32] = { 0 }; + P384(s, constant7Format, PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + memcpy(c, tempU8, CRYPTO_ABYTES * sizeof(unsigned char)); + +} +int Finalize_VerifyTag(u32 *s, const unsigned char *c, unsigned char *m, unsigned long long *mlen) { + u8 tempU8[32] = { 0 }; + P384(s, constant7Format, PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); + if (memcmp((void*)tempU8, (void*)(c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} +void ProcessCiphertext(u32 *s, unsigned char *m, const unsigned char *c, unsigned long long clen) +{ + u32 dataFormat[6] = { 0 }; + u32 dataFormat_1[3] = { 0 }; + u8 tempData[48] = { 0 },tempU8[48] = { 0 }; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + unpackU96FormatToThreePacket(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + P384(s, constant7Format, PR_ROUNDS); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG(((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG(((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG(((u32* )tempData)[2]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + c += clen; + packU96FormatToThreePacket(s, tempU8); + } + +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[12] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessPlaintext(s, m, mlen, c); + Finalize_GenerateTag(s, c + mlen); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[12] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + Initialize(s, npub, k); + // process associated data + ProcessAssocData(s, ad, adlen); + ProcessCiphertext(s, m, c, clen - CRYPTO_ABYTES); + // finalization + return Finalize_VerifyTag(s, c + clen - CRYPTO_KEYBYTES, m, mlen); +} diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_6/api.h b/knot/Implementations/crypto_aead/knot192/armcortexm_6/api.h new file mode 100644 index 0000000..c3cb1d9 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_6/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c new file mode 100644 index 0000000..9703080 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.c @@ -0,0 +1,105 @@ +#include"auxFormat.h" +void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { + unsigned int t, t1, t2; + u32 rci, temp; + rci = c[0]; + ARC(rci); + SBOX(s[0], s[3], s[6], s[9]); + SBOX(s[1], s[4], s[7], s[10]); + SBOX(s[2], s[5], s[8], s[11]); + t = 1; + while (lunnum--) { + temp = ((u32*) (c + t))[0]; + rci = temp & 0xff; + ARC(rci); + SBOX1_ROR(s[0], s[4], s[8], s[10] ); + SBOX2_ROR(s[1], s[5], s[6], s[11]); + SBOX3_ROR(s[2], s[3], s[7], s[9]); + rci = (temp & 0xff00) >> 8; + ARC(rci); + SBOX1_ROR(s[0], s[5], s[7], s[11]); + SBOX2_ROR(s[1], s[3], s[8], s[9]); + SBOX3_ROR(s[2], s[4], s[6], s[10]); + rci = (temp & 0xff0000) >> 16; + ARC(rci); + SBOX1_ROR(s[0], s[3], s[6], s[9]); + SBOX2_ROR(s[1], s[4], s[7], s[10]); + SBOX3_ROR(s[2], s[5], s[8], s[11]); + t += 3; + } +} + +void packU96FormatToThreePacket(u32 *out, u8 *in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + temp0[0] = U32BIG(((u32* )in)[0]); + temp0[1] = U32BIG(((u32*)in)[0]) >> 1; + temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = U32BIG(((u32* )in)[1]); + temp1[1] = U32BIG(((u32*)in)[1]) >> 1; + temp1[2] = U32BIG(((u32*)in)[1]) >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + temp2[0] = U32BIG(((u32* )in)[2]); + temp2[1] = U32BIG(((u32*)in)[2]) >> 1; + temp2[2] = U32BIG(((u32*)in)[2]) >> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1] << 21) | (temp1[0] << 10) | temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; +} +void unpackU96FormatToThreePacket(u8 *out, u32 *in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0] >> 10) & 0x7ff; + temp1[1] = (in[2] >> 11) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); +} +void packU48FormatToThreePacket(u32 *out, u8 *in) { + u32 t1 = (u32) U16BIG(*(u16* )(in + 4)); + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + temp0[0] = U32BIG(((u32* )in)[0]); + temp0[1] = U32BIG(((u32*)in)[0]) >> 1; + temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = t1; + temp1[1] = t1 >> 1; + temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + out[0] = (temp1[0] << 10) | temp0[2]; + out[1] = (temp1[2] << 11) | temp0[1]; + out[2] = (temp1[1] << 11) | temp0[0]; +} diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h new file mode 100644 index 0000000..81c4cd6 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_6/auxFormat.h @@ -0,0 +1,186 @@ + +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} +void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); +#define ARC(rci) \ + do { \ + __asm__ __volatile__ ( \ + "/*add round const s0 s1 s2 */ \n\t"\ + "ands %[t1], %[rci], #0xc0\n\t" \ + "eors %[S_0], %[S_0], %[t1], LSR #6 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x38\n\t" \ + "eors %[S_1], %[S_1], %[t1], LSR #3 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x7\n\t" \ + "eors %[S_3], %[S_3], %[t1] \n\t" /*s[2] ^= constant7Format[lunNum] & 0x7;*/\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [rci] "+r" (rci), \ + [S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_3] "+r" (s[2])\ + : : );\ +}while (0) +#define SBOX(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX1_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[t1] , %[S_4] ,ROR #30 \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] ,ROR #30 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_6], %[t1], %[S_6] ,ROR #14 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX2_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[t1] , %[S_4] ,ROR #29 \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] ,ROR #29 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_6], %[t1], %[S_6] ,ROR #14 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) + +#define SBOX3_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_0], %[S_2] ,ROR #31 \n\t"\ + "eors %[t1], %[t1], %[S_4] ,ROR #29 \n\t"\ + "orrs %[S_4], %[S_4], %[S_2] ,ROR #2 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #13 \n\t"\ + "eors %[S_4], %[S_0], %[S_4] ,ROR 29 \n\t"\ + "eors %[t2], %[S_6], %[S_2] ,ROR #18 \n\t"\ + "eors %[S_6], %[t1] , %[S_6] ,ROR #13 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[S_0] , %[t2] ,ROR #13 \n\t"\ + "ands %[S_2], %[S_4], %[t2] ,ROR #13 \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define P384_1( s, round, lunNum) {\ + u32 t1;\ + ROUND384_Three(s,round,lunNum);\ + __asm__ __volatile__ ( \ + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ + "mov %[t1], %[S_3] \n\t"\ + "mov %[S_3], %[S_4] \n\t"\ + "mov %[S_4], %[S_5] \n\t"\ + "ROR %[S_5], %[t1] , #31 \n\t"\ + "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t"\ + "mov %[t1], %[S_8] \n\t"\ + "ROR %[S_8], %[S_7] , #29 \n\t"\ + "ROR %[S_7], %[S_6] , #29 \n\t"\ + "ROR %[S_6], %[t1] , #30 \n\t"\ + "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t1], %[S_9] \n\t"\ + "ROR %[S_9], %[S_10] , #14 \n\t"\ + "ROR %[S_10], %[S_11] , #14 \n\t"\ + "ROR %[S_11], %[t1] , #13 \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1),\ + [S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\ + [S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\ + [S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\ + : : );\ +} +#define P384_2( s, round, lunNum) {\ + u32 t1,rci;\ + ROUND384_Three(s,round,lunNum);\ + rci=round[lunNum*3+1];\ + ARC(rci);\ + SBOX1_ROR(s[0], s[4], s[8], s[10] );\ + SBOX2_ROR(s[1], s[5], s[6], s[11]);\ + SBOX3_ROR(s[2], s[3], s[7], s[9]);\ + __asm__ __volatile__ ( \ + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ + "mov %[t1], %[S_4] \n\t"\ + "mov %[S_4], %[S_3] \n\t"\ + "mov %[S_3], %[S_5] \n\t"\ + "ROR %[S_5], %[t1] , #31 \n\t"\ + "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t"\ + "mov %[t1], %[S_8] \n\t"\ + "ROR %[S_8], %[S_6] , #29 \n\t"\ + "ROR %[S_6], %[S_7] , #30 \n\t"\ + "ROR %[S_7], %[t1] , #29 \n\t"\ + "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t1], %[S_10] \n\t"\ + "ROR %[S_10], %[S_9] , #14 \n\t"\ + "ROR %[S_9], %[S_11] , #14 \n\t"\ + "ROR %[S_11], %[t1] , #13 \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1),\ + [S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\ + [S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\ + [S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\ + : : );\ +} + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_6/crypto_aead.h b/knot/Implementations/crypto_aead/knot192/armcortexm_6/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_6/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_6/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_6/encrypt.c new file mode 100644 index 0000000..92c55ba --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_6/encrypt.c @@ -0,0 +1,169 @@ + +#include"auxFormat.h" + +#define aead_RATE (96 / 8) + +#define PR0_ROUNDS 25 +#define PR_ROUNDS 13 +#define PRF_ROUNDS 14 + +unsigned char constant7Format[76] = { +0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, +0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, +0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, +0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, +0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, +0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, +0xc7,0x1e,0xd1,0x8f}; +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[12] = { 0 }; + u32 dataFormat[3] = { 0 }; + u8 tempData[24] = { 0 }; + u32 t2; + *clen = mlen + CRYPTO_ABYTES; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); + + P384_1(s, constant7Format,PR0_ROUNDS); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384_1(s, constant7Format,PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384_1(s, constant7Format,PR_ROUNDS); + } + s[9] ^= 0x80000000; + if (mlen) { + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(c, s); + P384_1(s, constant7Format,PR_ROUNDS); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(tempData, s); + memcpy(c, tempData, mlen); + c += mlen; + } + // finalization + P384_2(s, constant7Format,PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(tempData, s); + unpackU96FormatToThreePacket((tempData + 12), (s + 3)); + memcpy(c, tempData, CRYPTO_ABYTES * sizeof(unsigned char)); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[12] = { 0 }; + u32 dataFormat[6] = { 0 }; + u32 dataFormat_1[3] = { 0 }; + u8 tempData[12] = { 0 }; + u8 tempU8[48] = { 0 }; + u32 t2; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); + + P384_1(s, constant7Format,PR0_ROUNDS); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + + P384_1(s, constant7Format,PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384_1(s, constant7Format,PR_ROUNDS); + } + s[9] ^= 0x80000000; + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + unpackU96FormatToThreePacket(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + P384_1(s, constant7Format,PR_ROUNDS); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG(((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG(((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG(((u32* )tempData)[2]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + packU96FormatToThreePacket(s, tempU8); + c+=clen; + } + // finalization + P384_2(s, constant7Format,PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); + if (memcmp((void*)tempU8, (void*)(c), CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_6/api.h b/knot/Implementations/crypto_aead/knot256/armcortexm_6/api.h new file mode 100644 index 0000000..396f722 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_6/api.h @@ -0,0 +1,7 @@ +#define CRYPTO_KEYBYTES 32 //256/8=32 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 32 +#define CRYPTO_ABYTES 32 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.c new file mode 100644 index 0000000..d38cf15 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.c @@ -0,0 +1,117 @@ +#include"auxFormat.h" +void P512(unsigned int *s, unsigned char *c, unsigned char rounds) { + u32 rci,t1,t2,t3,t9,temp=0; + unsigned char rcNum=0; + rci=c[rcNum++]; \ + P_512_ARC(rci); + P_512_SBOX1(s[0],s[4],s[8 ],s[12]); + P_512_SBOX1(s[1],s[5],s[9 ],s[13]); + P_512_SBOX1(s[2],s[6],s[10],s[14]); + P_512_SBOX1(s[3],s[7],s[11],s[15]); + while(rounds--){ + temp=((u32* )(c+rcNum))[0]; + rci=temp&0xff; + P_512_ARC(rci); + P_512_SBOX2_ROR(s[0],s[7],s[8],s[15]); + P_512_SBOX3_ROR(s[1],s[4],s[9 ],s[12]); + P_512_SBOX3_ROR(s[2],s[5],s[10],s[13]); + P_512_SBOX3_ROR(s[3],s[6],s[11],s[14]); + rci=(temp&0xff00)>>8; + P_512_ARC(rci); + P_512_SBOX2_ROR(s[0],s[6],s[8],s[14]); + P_512_SBOX3_ROR(s[1],s[7],s[9 ],s[15]); + P_512_SBOX3_ROR(s[2],s[4],s[10],s[12]); + P_512_SBOX3_ROR(s[3],s[5],s[11],s[13]); + rci=(temp&0xff0000)>>16; + P_512_ARC(rci); + P_512_SBOX2_ROR(s[0],s[5],s[8],s[13]); + P_512_SBOX3_ROR(s[1],s[6],s[9 ],s[14]); + P_512_SBOX3_ROR(s[2],s[7],s[10],s[15]); + P_512_SBOX3_ROR(s[3],s[4],s[11],s[12]); + rci=(temp&0xff000000)>>24; + P_512_ARC(rci); + P_512_SBOX2_ROR(s[0],s[4],s[8 ],s[12]); + P_512_SBOX3_ROR(s[1],s[5],s[9 ],s[13]); + P_512_SBOX3_ROR(s[2],s[6],s[10],s[14]); + P_512_SBOX3_ROR(s[3],s[7],s[11],s[15]); + rcNum+=4; + } + + temp=((u32* )(c+rcNum))[0]; + rci=temp&0xff; + P_512_ARC(rci); + P_512_SBOX2_ROR(s[0],s[7],s[8],s[15]); + P_512_SBOX3_ROR(s[1],s[4],s[9 ],s[12]); + P_512_SBOX3_ROR(s[2],s[5],s[10],s[13]); + P_512_SBOX3_ROR(s[3],s[6],s[11],s[14]); + rci=(temp&0xff00)>>8; + P_512_ARC(rci); + P_512_SBOX2_ROR(s[0],s[6],s[8],s[14]); + P_512_SBOX3_ROR(s[1],s[7],s[9 ],s[15]); + P_512_SBOX3_ROR(s[2],s[4],s[10],s[12]); + P_512_SBOX3_ROR(s[3],s[5],s[11],s[13]); + rci=(temp&0xff0000)>>16; + P_512_ARC(rci); + P_512_SBOX2_ROR(s[0],s[5],s[8],s[13]); + P_512_SBOX3_ROR(s[1],s[6],s[9 ],s[14]); + P_512_SBOX3_ROR(s[2],s[7],s[10],s[15]); + P_512_SBOX3_ROR(s[3],s[4],s[11],s[12]); + P_512_SR(s[4],s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15]); +} +void packU128FormatToFourPacket(u32 *out, u8 *in) { + u32 t0 = U32BIG(((u32* )in)[0]); + u32 t1 = U32BIG(((u32* )in)[1]); + u32 t2 = U32BIG(((u32* )in)[2]); + u32 t3 = U32BIG(((u32* )in)[3]); + u32 temp1; + puck32(t0); + puck32(t0); + puck32(t1); + puck32(t1); + puck32(t2); + puck32(t2); + puck32(t3); + puck32(t3); + out[3] = (t3 & 0xff000000) | ((t2 >> 8) & 0x00ff0000) + | ((t1 >> 16) & 0x0000ff00) | (t0 >> 24); + out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) + | ((t1 >> 8) & 0x0000ff00) | ((t0 >> 16) & 0x000000ff); + out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) + | (t1 & 0x0000ff00) | ((t0 >> 8) & 0x000000ff); + out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) + | ((t1 << 8) & 0x0000ff00) | (t0 & 0x000000ff); +} +void unpackU128FormatToFourPacket(u8 *out, u32 *in) { + u32 t[4] = { 0 }; + u32 r0; + t[3] = (in[3] & 0xff000000) | ((in[2] >> 8) & 0x00ff0000) + | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); + t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) + | ((in[1] >> 8) & 0x0000ff00) | ((in[0] >> 16) & 0x000000ff); + t[1] = ((in[3] << 16) & 0xff000000) | ((in[2] << 8) & 0x00ff0000) + | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); + t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) + | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); + unpuck32(t[0]); + unpuck32(t[0]); + unpuck32(t[1]); + unpuck32(t[1]); + unpuck32(t[2]); + unpuck32(t[2]); + unpuck32(t[3]); + unpuck32(t[3]); + memcpy(out, t, 16 * sizeof(unsigned char)); +} +void packU64FormatToFourPacket(u32 *out, u8 *in) { + u32 t1, t2, temp1; + t1 = U32BIG(((u32* )in)[0]); + t2 = U32BIG(((u32* )in)[1]); + puck32(t1); + puck32(t1); + puck32(t2); + puck32(t2); + out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); + out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); + out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); + out[0] = ((t2 << 8) & 0x0000ff00) | (t1 & 0x000000ff); +} diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.h new file mode 100644 index 0000000..2314568 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_6/auxFormat.h @@ -0,0 +1,149 @@ +#include"crypto_aead.h" +#include"api.h" +#include +#define U32BIG(x) (x) + +void P512(unsigned int *s, unsigned char *round, unsigned char rounds); + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +void printU8(char name[], u8 var[], long len, int offset); + +#define puck32(in)\ +{\ +temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ +temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ +temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ +temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ +} +#define unpuck32(t0){\ + r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ + r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ + r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ + r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ +} +//t1 +#define P_512_ARC(rci) \ + do { \ + __asm__ __volatile__ ( \ + "/*add round const s0 s1 s2 s3*/ \n\t"\ + "ands %[t1] , %[rci], #0xc0\n\t" \ + "eors %[S_3], %[S_3], %[t1], LSR #6 \n\t" /*s[3] ^= (constant7Format_aead[lunNum] >> 6) & 0x3;*/\ + "ands %[t2] , %[rci], #0x30\n\t" \ + "eors %[S_2], %[S_2], %[t2], LSR #4 \n\t" /*s[2] ^= (constant7Format_aead[lunNum] >> 4) & 0x3;*/\ + "ands %[t3] , %[rci], #0xc\n\t" \ + "eors %[S_1], %[S_1], %[t3], LSR #2 \n\t" /*s[1] ^= (constant7Format_aead[lunNum] >> 2) & 0x3;*/\ + "ands %[t4] , %[rci], #0x3\n\t" \ + "eors %[S_0], %[S_0], %[t4] \n\t" /*s[0] ^= constant7Format_aead[lunNum] & 0x3;*/\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2), [t3] "=r" (t3), [t4] "=r" (t9), [rci] "+r" (rci),\ + [S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_2] "+r" (s[2]),[S_3] "+r" (s[3])\ + : : );\ +}while (0) + +#define P_512_SR(S4,S8,S9,S10,S11,S12,S13,S14,S15) \ + do { \ + __asm__ __volatile__ ( \ + "ROR %[S_4] , #31 \n\t"\ + "ROR %[S_11] , #28 \n\t"\ + "ROR %[S_10] , #28 \n\t"\ + "ROR %[S_9] , #28 \n\t"\ + "ROR %[S_8] , #28 \n\t"\ + "ROR %[S_12] , #25 \n\t"\ + "ROR %[S_13] , #26 \n\t"\ + "ROR %[S_14] , #26 \n\t"\ + "ROR %[S_15] , #26 \n\t"\ + : /* output variables - including inputs that are changed */\ + [S_4] "+r" (S4),\ + [S_12] "+r" (S12), [S_8] "+r" (S8) ,\ + [S_13] "+r" (S13), [S_9] "+r" (S9) ,\ + [S_14] "+r" (S14), [S_10] "+r" (S10),\ + [S_15] "+r" (S15), [S_11] "+r" (S11)\ + : : );\ +}while (0) +unsigned char constant7Format_aead[100]; +#define P_512_SBOX2_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox first column 0,3,4,7 sbox1(s[0], ROR(s[3], 31),ROR(s[4], 28), ROR(s[7], 25)); */ \n\t" \ + "mvns %[S_0], %[S_0] \n\t" \ + "ands %[t1], %[S_0], %[S_3] , ROR #31 \n\t" \ + "eors %[t1], %[t1], %[S_4] , ROR #28 \n\t" \ + "orrs %[S_4], %[S_4], %[S_3] , ROR #3 \n\t" \ + "eors %[S_0], %[S_0], %[S_7] , ROR #25 \n\t" \ + "eors %[S_4], %[S_0], %[S_4] , ROR #28 /* 31-28=3*/ \n\t" \ + "eors %[t2], %[S_7], %[S_3] , ROR #6 \n\t" \ + "eors %[S_7], %[t1],%[S_7] , ROR #25 \n\t" \ + "ands %[S_0], %[t1],%[S_0] \n\t" \ + "eors %[S_0], %[S_0], %[t2] , ROR #25 /* 31-25=6*/ \n\t" \ + "ands %[S_3], %[S_4], %[t2] , ROR #25 /* 31-25=6*/ \n\t" \ + "eors %[S_3], %[t1],%[S_3] \n\t" \ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_3] "+r" (S2), [S_4] "+r" (S3), [S_7] "+r" (S4) \ + : : );\ +}while (0) +#define P_512_SBOX3_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 26)); */ \n\t" \ + "mvns %[S_0], %[S_0] \n\t" \ + "ands %[t1], %[S_3], %[S_0] \n\t" \ + "eors %[t1], %[t1], %[S_4] , ROR #28 \n\t" \ + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" \ + "eors %[S_0], %[S_0], %[S_7] , ROR #26 \n\t" \ + "eors %[S_4], %[S_4], %[S_0] \n\t" \ + "eors %[t2], %[S_3], %[S_7] , ROR #26 \n\t" \ + "eors %[S_7], %[t1] , %[S_7] , ROR #26 \n\t" \ + "ands %[S_0], %[t1],%[S_0] \n\t" \ + "eors %[S_0], %[t2],%[S_0] \n\t" \ + "ands %[S_3], %[S_4], %[t2] \n\t" \ + "eors %[S_3], %[t1], %[S_3] \n\t" \ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_3] "+r" (S2), [S_4] "+r" (S3), [S_7] "+r" (S4) \ + : : );\ +}while (0) + +#define P_512_SBOX1(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define P_512_SR(S4,S8,S9,S10,S11,S12,S13,S14,S15) \ + do { \ + __asm__ __volatile__ ( \ + "ROR %[S_4] , #31 \n\t"\ + "ROR %[S_11] , #28 \n\t"\ + "ROR %[S_10] , #28 \n\t"\ + "ROR %[S_9] , #28 \n\t"\ + "ROR %[S_8] , #28 \n\t"\ + "ROR %[S_12] , #25 \n\t"\ + "ROR %[S_13] , #26 \n\t"\ + "ROR %[S_14] , #26 \n\t"\ + "ROR %[S_15] , #26 \n\t"\ + : /* output variables - including inputs that are changed */\ + [S_4] "+r" (S4),\ + [S_12] "+r" (S12), [S_8] "+r" (S8) ,\ + [S_13] "+r" (S13), [S_9] "+r" (S9) ,\ + [S_14] "+r" (S14), [S_10] "+r" (S10),\ + [S_15] "+r" (S15), [S_11] "+r" (S11)\ + : : );\ +}while (0) diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_6/crypto_aead.h b/knot/Implementations/crypto_aead/knot256/armcortexm_6/crypto_aead.h new file mode 100644 index 0000000..cdfdf19 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_6/crypto_aead.h @@ -0,0 +1,17 @@ +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_6/encrypt.c b/knot/Implementations/crypto_aead/knot256/armcortexm_6/encrypt.c new file mode 100644 index 0000000..99e9e44 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_6/encrypt.c @@ -0,0 +1,186 @@ +#include"auxFormat.h" + +#define aead_RATE 16 +//#define aead_RATE (128 / 8) +#define PR0_ROUNDS 24 +#define PR_ROUNDS 12 +#define PRF_ROUNDS 13 +unsigned char constant7Format_aead[100] = { 0x01, 0x04, 0x10, 0x40, 0x02, 0x08, + 0x21, 0x05, 0x14, 0x50, 0x42, 0x0a, 0x29, 0x24, 0x11, 0x44, 0x12, 0x48, + 0x23, 0x0d, 0x35, 0x55, 0x56, 0x5a, 0x6b, 0x2e, 0x38, 0x60, 0x03, 0x0c, + 0x31, 0x45, 0x16, 0x58, 0x63, 0x0f, 0x3d, 0x74, 0x53, 0x4e, 0x3b, 0x6c, + 0x32, 0x49, 0x27, 0x1d, 0x75, 0x57, 0x5e, 0x7b, 0x6e, 0x3a, 0x68, 0x22, + 0x09, 0x25, 0x15, 0x54, 0x52, 0x4a, 0x2b, 0x2c, 0x30, 0x41, 0x06, 0x18, + 0x61, 0x07, 0x1c, 0x71, 0x47, 0x1e, 0x79, 0x66, 0x1b, 0x6d, 0x36, 0x59, + 0x67, 0x1f, 0x7d, 0x76, 0x5b, 0x6f, 0x3e, 0x78, 0x62, 0x0b, 0x2d, 0x34, + 0x51, 0x46, 0x1a, 0x69, 0x26, 0x19, 0x65, 0x17, 0x5c, 0x73, }; +//initialization + +#define Processing_Data(data) \ +do { \ + packU128FormatToFourPacket(dataFormat, data); \ + s[0] ^= dataFormat[0]; \ + s[1] ^= dataFormat[1]; \ + s[2] ^= dataFormat[2]; \ + s[3] ^= dataFormat[3]; \ +} while (0) + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[16] = { 0 }; + u32 dataFormat[4] = { 0 }; + u8 tempData[16] = { 0 }; + u8 tempU8[32] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + //initialization + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket((s + 4), (npub + 16)); + packU128FormatToFourPacket((s + 8), k); + packU128FormatToFourPacket((s + 12), (k + 16)); + P512(s, constant7Format_aead, PR0_ROUNDS); + + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + P512(s, constant7Format_aead, PR_ROUNDS); + + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + P512(s, constant7Format_aead, PR_ROUNDS); + + } + s[15] ^= 0x80000000; + // process p data + if (mlen) { + while (mlen >= aead_RATE) { + Processing_Data(m); + unpackU128FormatToFourPacket(c, s); + + P512(s, constant7Format_aead, PR_ROUNDS); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + Processing_Data(tempData); + unpackU128FormatToFourPacket(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + P512(s, constant7Format_aead, PRF_ROUNDS); + unpackU128FormatToFourPacket(tempU8, s); + unpackU128FormatToFourPacket((tempU8 + 16), (s + 4)); + memcpy(c, tempU8, CRYPTO_ABYTES * sizeof(unsigned char)); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + // initialization + u32 s[16] = { 0 }; + u32 dataFormat_1[4] = { 0 }; + u32 dataFormat[4] = { 0 }; + u8 tempData[32] = { 0 }; + u8 tempU8[64] = { 0 }; + if (clen < CRYPTO_ABYTES) + return -1; + *mlen = clen - CRYPTO_ABYTES; + //initialization + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket((s + 4), (npub + 16)); + packU128FormatToFourPacket((s + 8), k); + packU128FormatToFourPacket((s + 12), (k + 16)); + P512(s, constant7Format_aead, PR0_ROUNDS); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + P512(s, constant7Format_aead, PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + P512(s, constant7Format_aead, PR_ROUNDS); + + } + s[15] ^= 0x80000000; + // process c data + clen = clen - CRYPTO_KEYBYTES; + if (clen) { + while (clen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + dataFormat_1[3] = s[3] ^ dataFormat[3]; + unpackU128FormatToFourPacket(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; + P512(s, constant7Format_aead, PR_ROUNDS); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU128FormatToFourPacket(tempU8, s); + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG( + ((u32* )tempData)[2]); + U32BIG(((u32*)tempU8)[3]) ^= U32BIG( + ((u32* )tempData)[3]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + packU128FormatToFourPacket(s, tempU8); + c += clen; + } + // finalization + P512(s, constant7Format_aead, PRF_ROUNDS); + unpackU128FormatToFourPacket(tempU8, s); + unpackU128FormatToFourPacket((tempU8 + 16), (s + 4)); + if (memcmp((void*) tempU8, (void*) c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_hash/genkat.c b/knot/Implementations/crypto_hash/genkat.c new file mode 100644 index 0000000..76a7bcb --- /dev/null +++ b/knot/Implementations/crypto_hash/genkat.c @@ -0,0 +1,166 @@ +// +// NIST-developed software is provided by NIST as a public service. +// You may use, copy and distribute copies of the software in any medium, +// provided that you keep intact this entire notice. You may improve, +// modify and create derivative works of the software or any portion of +// the software, and you may copy and distribute such modifications or +// works. Modified works should carry a notice stating that you changed +// the software and should note the date and nature of any such change. +// Please explicitly acknowledge the National Institute of Standards and +// Technology as the source of the software. +// +// NIST-developed software is expressly provided "AS IS." NIST MAKES NO +// WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION +// OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST +// NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE +// UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST +// DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE +// OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY, +// RELIABILITY, OR USEFULNESS OF THE SOFTWARE. +// +// You are solely responsible for determining the appropriateness of using and +// distributing the software and you assume all risks associated with its use, +// including but not limited to the risks and costs of program errors, compliance +// with applicable laws, damage to or loss of data, programs or equipment, and +// the unavailability or interruption of operation. This software is not intended +// to be used in any situation where a failure could cause risk of injury or +// damage to property. The software developed by NIST employees is not subject to +// copyright protection within the United States. +// + +// disable deprecation for sprintf and fopen +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include + +#include "crypto_hash.h" +#include "api.h" + +#define KAT_SUCCESS 0 +#define KAT_FILE_OPEN_ERROR -1 +#define KAT_DATA_ERROR -3 +#define KAT_CRYPTO_FAILURE -4 + + +#define MAX_FILE_NAME 256 +#define MAX_MESSAGE_LENGTH 32 +#define MAX_ASSOCIATED_DATA_LENGTH 32 + +void init_buffer(unsigned char *buffer, unsigned long long numbytes); + +void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, unsigned long long length); + +int generate_test_vectors_aead(); +int generate_test_vectors_hash(); + + +#define MAX_FILE_NAME_HASH 256 +#define MAX_MESSAGE_LENGTH_HASH 32 +int generate_test_vectors_hash() +{ + unsigned char msg[32]={ + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + }; + unsigned char digest[CRYPTO_BYTES]; + int ret_val = KAT_SUCCESS; + + + for (unsigned long long mlen = 0; mlen <= MAX_MESSAGE_LENGTH_HASH; mlen++) { + ret_val = crypto_hash(digest, msg, mlen); + } + + return ret_val; +} +/* +#define MAX_MESSAGE_LENGTH_HASH 1024 +int generate_test_vectors_hash() +{ + unsigned char msg[MAX_MESSAGE_LENGTH_HASH]={ + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, + 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, + 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, + 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, + 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, + 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, + 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, + 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff, + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, + 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, + 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, + 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, + 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, + 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, + 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, + 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff, + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, + 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, + 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, + 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, + 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, + 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, + 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, + 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff, + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, + 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, + 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, + 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, + 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, + 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, + 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, + 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff, + }; + unsigned char digest[CRYPTO_BYTES]; + int ret_val = KAT_SUCCESS; + + + for (unsigned long long mlen = 0; mlen <= MAX_MESSAGE_LENGTH_HASH; mlen++) { + + + ret_val = crypto_hash(digest, msg, mlen); + + if (ret_val != 0) { + printf("error on mlen=%d:\n",mlen); + break; + } + } + + return ret_val; +} +*/ diff --git a/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/api.h b/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/api.h new file mode 100644 index 0000000..ba6bd20 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 diff --git a/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/auxFormat.c new file mode 100644 index 0000000..4195ea3 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/auxFormat.c @@ -0,0 +1,182 @@ +#include"auxFormat.h" + +void P256(unsigned int *s, unsigned char *rc, unsigned char rounds) +{ + unsigned int reg1, reg2; + asm volatile ( + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], s[4], s[6]); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[S_4], %[reg1] \n\t" + "orrs %[S_4], %[S_2], %[S_4] \n\t" + "eors %[S_0], %[S_6], %[S_0] \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] \n\t" + "eors %[S_6], %[S_6], %[reg1] \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], s[3], s[5], s[7]) */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_3], %[S_1] \n\t" + "eors %[reg1], %[S_5], %[reg1] \n\t" + "orrs %[S_5], %[S_3], %[S_5] \n\t" + "eors %[S_1], %[S_7], %[S_1] \n\t" + "eors %[S_5], %[S_5], %[S_1] \n\t" + "eors %[reg2], %[S_3], %[S_7] \n\t" + "eors %[S_7], %[S_7], %[reg1] \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[reg2],%[S_1] \n\t" + "ands %[S_3], %[S_5], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "enc_loop2: \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_3], %[reg1],%[S_3] \n\t" + "/*loop control*/ \n\t" + "subs %[ro], %[ro], #1 \n\t" + "bne enc_loop2 \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "ROR %[S_3], #31 \n\t" + "ROR %[S_4], #28 \n\t" + "ROR %[S_5], #28 \n\t" + "ROR %[S_6], #20 \n\t" + "ROR %[S_7], #19 \n\t" + : /* output variables - including inputs that are changed */ + [ro] "+r" (rounds),[reg1] "=r" (reg1), [reg2] "=r" (reg2), [rc] "+r" (rc), + [S_0] "+r" (s[0]), [S_2] "+r" (s[2]), [S_4] "+r" (s[4]), [S_6] "+r" (s[6]) , + [S_1] "+r" (s[1]), [S_3] "+r" (s[3]), [S_5] "+r" (s[5]), [S_7] "+r" (s[7]) + : /* input variables */ + : /* clobber registers for temporary values */ + ); +} + +void unpackFormat(u8 * out, u32 * in) { + u32 t[2] = { 0 }; + t[1] = (in[0] & 0xFFFF0000) | (in[1] >> 16); + t[0] = (in[1] & 0x0000FFFF) | (in[0] << 16); + u32 r0, r1; + r0 = (t[0] ^ (t[0] >> 8)) & 0x0000FF00, t[0] ^= r0 ^ (r0 << 8); + r0 = (t[0] ^ (t[0] >> 4)) & 0x00F000F0, t[0] ^= r0 ^ (r0 << 4); + r0 = (t[0] ^ (t[0] >> 2)) & 0x0C0C0C0C, t[0] ^= r0 ^ (r0 << 2); + r0 = (t[0] ^ (t[0] >> 1)) & 0x22222222, t[0] ^= r0 ^ (r0 << 1); + r1 = (t[1] ^ (t[1] >> 8)) & 0x0000FF00, t[1] ^= r1 ^ (r1 << 8); + r1 = (t[1] ^ (t[1] >> 4)) & 0x00F000F0, t[1] ^= r1 ^ (r1 << 4); + r1 = (t[1] ^ (t[1] >> 2)) & 0x0C0C0C0C, t[1] ^= r1 ^ (r1 << 2); + r1 = (t[1] ^ (t[1] >> 1)) & 0x22222222, t[1] ^= r1 ^ (r1 << 1); + memcpy(out, t, 8 * sizeof(unsigned char)); +} + + +void getU32Format(u32 *out, const u8* in) { + u32 r0, lo = U32BIG(((u32* )in)[0]); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + *out = lo; +} diff --git a/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/auxFormat.h new file mode 100644 index 0000000..c2c9aa5 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/auxFormat.h @@ -0,0 +1,15 @@ +#include"crypto_hash.h" +#include"api.h" +#define U32BIG(x) (x) +#include + + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +void getU32Format(u32 *out, const u8* in); +void unpackFormat(u8 * out, u32 * in) ; +void P256(unsigned int *s, unsigned char *rc, unsigned char rounds); + + diff --git a/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/crypto_hash.h b/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/crypto_hash.h new file mode 100644 index 0000000..646e161 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/crypto_hash.h @@ -0,0 +1,7 @@ + + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); diff --git a/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/hash.c b/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/hash.c new file mode 100644 index 0000000..a08aefb --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/armcortexm_6/hash.c @@ -0,0 +1,114 @@ +#include"auxFormat.h" + + +//#define hash_RATE (32 / 8) +#define hash_RATE 4 + +#define PRH_ROUNDS 33 +//#define PRH_ROUNDS 68 +unsigned char constant7Format[68] = { +/*constant7_hash_256v1:*/ + 0x1, + 0x10, + 0x2, + 0x20, + 0x4, + 0x40, + 0x9, + 0x11, + 0x12, + 0x22, + 0x24, + 0x44, + 0x49, + 0x18, + 0x3, + 0x30, + 0x6, + 0x60, + 0xd, + 0x51, + 0x1b, + 0x33, + 0x36, + 0x66, + 0x6d, + 0x5c, + 0x4a, + 0x28, + 0x5, + 0x50, + 0xb, + 0x31, + 0x16, + 0x62, + 0x2d, + 0x55, + 0x5b, + 0x3a, + 0x27, + 0x74, + 0x4f, + 0x78, + 0xe, + 0x61, + 0x1d, + 0x53, + 0x3b, + 0x37, + 0x76, + 0x6f, + 0x7c, + 0x4e, + 0x68, + 0xc, + 0x41, + 0x19, + 0x13, + 0x32, + 0x26, + 0x64, + 0x4d, + 0x58, + 0xa, + 0x21, + 0x14, + 0x42, + 0x29, + 0x15, +}; +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + u32 dataFormat[2] = { 0 }; + // initialization + u32 s[8] = { 0 }; + u8 tempData[32]; + //absorb + //RATE=4 + while (inlen >= hash_RATE) { + getU32Format(dataFormat, in); + s[0] ^= dataFormat[0] >>16; + s[1] ^= dataFormat[0] &0xffff; + P256(s, constant7Format, PRH_ROUNDS); + inlen -= hash_RATE; + in += hash_RATE; + } + memset(tempData, 0, hash_RATE); + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + getU32Format(dataFormat, tempData); + s[0] ^= dataFormat[0] >> 16; + s[1] ^= dataFormat[0] & 0xffff; + P256(s, constant7Format, PRH_ROUNDS); + //sequeez + unpackFormat(out, s); + unpackFormat((out + 8), (s + 2)); + P256(s, constant7Format, PRH_ROUNDS); + out += CRYPTO_BYTES / 2; + unpackFormat(out, s); + unpackFormat((out + 8), (s + 2)); + return 0; +} + + + diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/api.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/api.h new file mode 100644 index 0000000..efe69ac --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/api.h @@ -0,0 +1,3 @@ + +#define CRYPTO_BYTES 32 + diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/auxFormat.h new file mode 100644 index 0000000..de4d658 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/auxFormat.h @@ -0,0 +1,123 @@ +#include +#include"crypto_hash.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} +#define ROUND384(lunNum) {\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ +s[5] = LOTR32(s_temp[3], 1); \ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +} + +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} +#define packU32FormatToThreePacket(out, in) { \ + u32 t2 = U32BIG(((u32*)in)[0]); \ + out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;\ + puckU32ToThree_1(out[0]); \ + puckU32ToThree_1(out[1]); \ + puckU32ToThree_1(out[2]); \ +} +#define unpackU32FormatToThreePacket(out, in) { \ + u32 temp0[3] = { 0 }; \ + temp0[0] = in[0] & 0x3ff; \ + temp0[1] = in[1] & 0x7ff; \ + temp0[2] = in[2] & 0x7ff; \ + unpuckU32ToThree_1(temp0[0]); \ + unpuckU32ToThree_1(temp0[1]); \ + unpuckU32ToThree_1(temp0[2]); \ + *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); \ +} +#define packU96FormatToThreePacket(out, in) { \ + u32 temp0[3] = { 0 }; \ + u32 temp1[3] = { 0 }; \ + u32 temp2[3] = { 0 }; \ + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; \ + puckU32ToThree_1(temp0[0]); \ + puckU32ToThree_1(temp0[1]); \ + puckU32ToThree_1(temp0[2]); \ + temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; \ + puckU32ToThree_1(temp1[0]); \ + puckU32ToThree_1(temp1[1]); \ + puckU32ToThree_1(temp1[2]); \ + temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; \ + puckU32ToThree_1(temp2[0]); \ + puckU32ToThree_1(temp2[1]); \ + puckU32ToThree_1(temp2[2]); \ + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; \ + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; \ + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; \ +} +#define unpackU96FormatToThreePacket(out, in) { \ + u32 temp0[3] = { 0 }; \ + u32 temp1[3] = { 0 }; \ + u32 temp2[3] = { 0 }; \ + u32 t[3] = { 0 }; \ + temp0[0] = in[2] & 0x7ff; \ + temp0[1] = in[1] & 0x7ff; \ + temp0[2] = in[0] & 0x3ff; \ + temp1[0] = (in[0]>>10) & 0x7ff; \ + temp1[1] = (in[2] >>11 ) & 0x7ff; \ + temp1[2] = (in[1] >> 11) & 0x3ff; \ + temp2[0] = in[1] >> 21; \ + temp2[1] = in[0] >> 21; \ + temp2[2] = in[2] >> 22; \ + unpuckU32ToThree_1(temp0[0]); \ + unpuckU32ToThree_1(temp0[1]); \ + unpuckU32ToThree_1(temp0[2]); \ + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; \ + unpuckU32ToThree_1(temp1[0]); \ + unpuckU32ToThree_1(temp1[1]); \ + unpuckU32ToThree_1(temp1[2]); \ + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; \ + unpuckU32ToThree_1(temp2[0]); \ + unpuckU32ToThree_1(temp2[1]); \ + unpuckU32ToThree_1(temp2[2]); \ + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ +} diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/crypto_hash.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/crypto_hash.h new file mode 100644 index 0000000..0632591 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/crypto_hash.h @@ -0,0 +1,6 @@ + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/hash.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/hash.c new file mode 100644 index 0000000..e38acaa --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_3_1/hash.c @@ -0,0 +1,65 @@ +#include"auxFormat.h" + +#define hash_RATE 16 +//#define hash_RATE (128 / 8) +#define PRH_ROUNDS 80 +//12*7=84 +unsigned char constant7Format[80] = { +/*constant7Format[127]:*/ +0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, + 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, + 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, + 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, + 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, + 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, + 0x1e, 0xd1, 0x8f, 0x5c, 0xc3, 0x1f, 0xd9, }; +#define Processing_Data(data) \ +do { \ +packU96FormatToThreePacket(dataFormat, data);\ +s[0] ^= dataFormat[0];\ +s[1] ^= dataFormat[1];\ +s[2] ^= dataFormat[2];\ +packU32FormatToThreePacket((dataFormat + 3), (data + 12));\ +s[3] ^= dataFormat[3];\ +s[4] ^= dataFormat[4];\ +s[5] ^= dataFormat[5];\ +} while (0) + +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + + u32 s[12] = { 0 }; + u32 dataFormat[6] = { 0 }; + u8 i, tempData[24] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + // initialization + s[9] = 0x80000000; + //absorb + while (inlen >= hash_RATE) { + Processing_Data(in); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + inlen -= hash_RATE; + in += hash_RATE; + } + memset(tempData, 0, hash_RATE); + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + Processing_Data(tempData); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + //sequeez + unpackU96FormatToThreePacket(out, s); + unpackU32FormatToThreePacket((out + 12), (s + 3)); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + out += CRYPTO_BYTES / 2; + unpackU96FormatToThreePacket(out, s); + unpackU32FormatToThreePacket((out + 12), (s + 3)); + return 0; +} + diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/api.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/api.h new file mode 100644 index 0000000..7715d8e --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 32 + diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/auxFormat.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/auxFormat.c new file mode 100644 index 0000000..6e79b71 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/auxFormat.c @@ -0,0 +1,93 @@ +#include"auxFormat.h" +void packU32FormatToThreePacket(u32 * out, u8 * in) { + u32 t2 = U32BIG(((u32*)in)[0]); + out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;//temp2[0] 0;temp2[1] 1;temp2[2] 2; + puckU32ToThree_1(out[0]); + puckU32ToThree_1(out[1]); + puckU32ToThree_1(out[2]); +} +void unpackU32FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + temp0[0] = in[0] & 0x3ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[2] & 0x7ff; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); +} + + +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; +} +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0]>>10) & 0x7ff; + temp1[1] = (in[2] >>11 ) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); +} +unsigned char constant7Format[80] = { + /*constant7Format[127]: 12*6=72*/ + 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, + 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, + 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, + 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, + 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, + 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, + 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,}; +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + unsigned char i; + for (i = 0; i < lunNum; i++) { +s[0] ^= (round[i] >> 6) & 0x3;\ +s[1] ^= (round[i] >> 3) & 0x7;\ +s[2] ^= round[i] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ +s[5] = LOTR32(s_temp[3], 1); \ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ + } +} diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/auxFormat.h new file mode 100644 index 0000000..85f53af --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/auxFormat.h @@ -0,0 +1,53 @@ +#include +#include"crypto_hash.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) ; +void unpackU96FormatToThreePacket(u8 * out, u32 * in) ; +void packU96FormatToThreePacket(u32 * out, u8 * in); +void unpackU32FormatToThreePacket(u8 * out, u32 * in); +void packU32FormatToThreePacket(u32 * out, u8 * in); +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} +unsigned char constant7Format[80]; + + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +//55=3*18+1 +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/crypto_hash.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/crypto_hash.h new file mode 100644 index 0000000..0632591 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/crypto_hash.h @@ -0,0 +1,6 @@ + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/hash.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/hash.c new file mode 100644 index 0000000..8066529 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_4_1/hash.c @@ -0,0 +1,57 @@ +#include"auxFormat.h" + + +#define hash_RATE 16 + +//#define hash_RATE (128 / 8) +#define PRH_ROUNDS 80 + + +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + + u32 s[12] = { 0 }; + u32 dataFormat[6] = { 0 }; + u8 tempData[24] = { 0 }; + // initialization + s[9] = 0x80000000; + //absorb + while (inlen >= hash_RATE) { + packU96FormatToThreePacket(dataFormat, in); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU32FormatToThreePacket(dataFormat + 3, in + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + P384(s, constant7Format, PRH_ROUNDS); + inlen -= hash_RATE; + in += hash_RATE; + } + memset(tempData, 0, hash_RATE); + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU32FormatToThreePacket(dataFormat + 3, tempData + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + + P384(s, constant7Format, PRH_ROUNDS); + //sequeez + unpackU96FormatToThreePacket(out, s); + unpackU32FormatToThreePacket(out + 12, s + 3); + P384(s, constant7Format, PRH_ROUNDS); + out += CRYPTO_BYTES / 2; + unpackU96FormatToThreePacket(out, s); + unpackU32FormatToThreePacket(out + 12, s + 3); + return 0; + +} + + + diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/api.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/api.h new file mode 100644 index 0000000..7715d8e --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 32 + diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c new file mode 100644 index 0000000..6cdc800 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.c @@ -0,0 +1,106 @@ +#include"auxFormat.h" +void packU32FormatToThreePacket(u32 * out, u8 * in) { + u32 t2 = U32BIG(((u32*)in)[0]); + out[2] = t2; out[1] = t2 >> 1; out[0] = t2 >> 2;//temp2[0] 0;temp2[1] 1;temp2[2] 2; + puckU32ToThree_1(out[0]); + puckU32ToThree_1(out[1]); + puckU32ToThree_1(out[2]); +} +void unpackU32FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + temp0[0] = in[0] & 0x3ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[2] & 0x7ff; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + *(u32*)(out) = U32BIG(temp0[0]<<2 | temp0[1] << 1 | temp0[2]); +} + + +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = U32BIG(((u32*)in)[1]); temp1[1] = U32BIG(((u32*)in)[1]) >>1; temp1[2] = U32BIG(((u32*)in)[1]) >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + temp2[0] = U32BIG(((u32*)in)[2]); temp2[1] = U32BIG(((u32*)in)[2]) >> 1; temp2[2] = U32BIG(((u32*)in)[2]) >> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; +} +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0]>>10) & 0x7ff; + temp1[1] = (in[2] >>11 ) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); +} +void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { + unsigned int t, t1, t2; + u32 rci, temp; + rci = c[0]; + ARC(rci); + SBOX(s[0], s[3], s[6], s[9]); + SBOX(s[1], s[4], s[7], s[10]); + SBOX(s[2], s[5], s[8], s[11]); + t = 1; + while (lunnum--) { + temp = ((u32*) (c + t))[0]; + rci = temp & 0xff; + ARC(rci); + SBOX1_ROR(s[0], s[4], s[8], s[10] ); + SBOX2_ROR(s[1], s[5], s[6], s[11]); + SBOX3_ROR(s[2], s[3], s[7], s[9]); + rci = (temp & 0xff00) >> 8; + ARC(rci); + SBOX1_ROR(s[0], s[5], s[7], s[11]); + SBOX2_ROR(s[1], s[3], s[8], s[9]); + SBOX3_ROR(s[2], s[4], s[6], s[10]); + rci = (temp & 0xff0000) >> 16; + ARC(rci); + SBOX1_ROR(s[0], s[3], s[6], s[9]); + SBOX2_ROR(s[1], s[4], s[7], s[10]); + SBOX3_ROR(s[2], s[5], s[8], s[11]); + t += 3; + } +} +unsigned char constant7Format[80] = { + /*constant7Format[127]: 12*6=72*/ + 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, + 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, + 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, + 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, + 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, + 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, + 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,}; diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h new file mode 100644 index 0000000..58eef3d --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/auxFormat.h @@ -0,0 +1,161 @@ +#include +#include"crypto_hash.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum) ; +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} +////////////constant begin// +unsigned char constant7Format[80]; +#define ARC(rci) \ + do { \ + __asm__ __volatile__ ( \ + "/*add round const s0 s1 s2 */ \n\t"\ + "ands %[t1], %[rci], #0xc0\n\t" \ + "eors %[S_0], %[S_0], %[t1], LSR #6 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x38\n\t" \ + "eors %[S_1], %[S_1], %[t1], LSR #3 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x7\n\t" \ + "eors %[S_3], %[S_3], %[t1] \n\t" /*s[2] ^= constant7Format[lunNum] & 0x7;*/\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [rci] "+r" (rci), \ + [S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_3] "+r" (s[2])\ + : : );\ +}while (0) +#define SBOX(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX1_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[t1] , %[S_4] ,ROR #30 \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] ,ROR #30 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_6], %[t1], %[S_6] ,ROR #14 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX2_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[t1] , %[S_4] ,ROR #29 \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] ,ROR #29 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_6], %[t1], %[S_6] ,ROR #14 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) + +#define SBOX3_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_0], %[S_2] ,ROR #31 \n\t"\ + "eors %[t1], %[t1], %[S_4] ,ROR #29 \n\t"\ + "orrs %[S_4], %[S_4], %[S_2] ,ROR #2 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #13 \n\t"\ + "eors %[S_4], %[S_0], %[S_4] ,ROR 29 \n\t"\ + "eors %[t2], %[S_6], %[S_2] ,ROR #18 \n\t"\ + "eors %[S_6], %[t1] , %[S_6] ,ROR #13 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[S_0] , %[t2] ,ROR #13 \n\t"\ + "ands %[S_2], %[S_4], %[t2] ,ROR #13 \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) + +#define P384_2( s, round, lunNum) {\ + u32 t1,rci;\ + ROUND384_Three(s,round,lunNum);\ + rci=round[lunNum*3+1];\ + ARC(rci);\ + SBOX1_ROR(s[0], s[4], s[8], s[10] );\ + SBOX2_ROR(s[1], s[5], s[6], s[11]);\ + SBOX3_ROR(s[2], s[3], s[7], s[9]);\ + __asm__ __volatile__ ( \ + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ + "mov %[t1], %[S_4] \n\t"\ + "mov %[S_4], %[S_3] \n\t"\ + "mov %[S_3], %[S_5] \n\t"\ + "ROR %[S_5], %[t1] , #31 \n\t"\ + "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t"\ + "mov %[t1], %[S_8] \n\t"\ + "ROR %[S_8], %[S_6] , #29 \n\t"\ + "ROR %[S_6], %[S_7] , #30 \n\t"\ + "ROR %[S_7], %[t1] , #29 \n\t"\ + "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t1], %[S_10] \n\t"\ + "ROR %[S_10], %[S_9] , #14 \n\t"\ + "ROR %[S_9], %[S_11] , #14 \n\t"\ + "ROR %[S_11], %[t1] , #13 \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1),\ + [S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\ + [S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\ + [S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\ + : : );\ +} diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/crypto_hash.h b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/crypto_hash.h new file mode 100644 index 0000000..0632591 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/crypto_hash.h @@ -0,0 +1,6 @@ + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/hash.c b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/hash.c new file mode 100644 index 0000000..321855b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/armcortexm_6/hash.c @@ -0,0 +1,56 @@ +#include"auxFormat.h" +//#define hash_RATE (128 / 8) +#define hash_RATE 16 +//#define PRH_ROUNDS 80 +#define PRH_ROUNDS 26 + + +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + + u32 s[12] = { 0 }; + u32 dataFormat[6] = { 0 }; + u8 tempData[24] = { 0 }; + u32 t2; + // initialization + s[9] = 0x80000000; + //absorb + while (inlen >= hash_RATE) { + packU96FormatToThreePacket(dataFormat, in); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU32FormatToThreePacket(dataFormat + 3, in + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + P384_2(s, constant7Format, PRH_ROUNDS); + inlen -= hash_RATE; + in += hash_RATE; + } + memset(tempData, 0, hash_RATE); + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU32FormatToThreePacket(dataFormat + 3, tempData + 12); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + P384_2(s, constant7Format, PRH_ROUNDS); + //sequeez + + unpackU96FormatToThreePacket(out, s); + unpackU32FormatToThreePacket(out + 12, s + 3); + P384_2(s, constant7Format, PRH_ROUNDS); + out += CRYPTO_BYTES / 2; + unpackU96FormatToThreePacket(out, s); + unpackU32FormatToThreePacket(out + 12, s + 3); + return 0; + +} + + + diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/api.h b/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/api.h new file mode 100644 index 0000000..249c3fd --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/api.h @@ -0,0 +1,2 @@ + +#define CRYPTO_BYTES 48 diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/auxFormat.h new file mode 100644 index 0000000..6da2d0e --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/auxFormat.h @@ -0,0 +1,107 @@ +#include +#include"crypto_hash.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) + +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} +#define unpackU96FormatToThreePacket( out, in) {\ + u32 temp0[3] = { 0 }; \ + u32 temp1[3] = { 0 }; \ + u32 temp2[3] = { 0 }; \ + u32 t[3] = { 0 }; \ + temp0[0] = in[2] & 0x7ff; \ + temp0[1] = in[1] & 0x7ff; \ + temp0[2] = in[0] & 0x3ff; \ + temp1[0] = (in[0]>>10) & 0x7ff; \ + temp1[1] = (in[2] >>11 ) & 0x7ff; \ + temp1[2] = (in[1] >> 11) & 0x3ff; \ + temp2[0] = in[1] >> 21; \ + temp2[1] = in[0] >> 21; \ + temp2[2] = in[2] >> 22; \ + unpuckU32ToThree_1(temp0[0]); \ + unpuckU32ToThree_1(temp0[1]); \ + unpuckU32ToThree_1(temp0[2]); \ + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; \ + unpuckU32ToThree_1(temp1[0]); \ + unpuckU32ToThree_1(temp1[1]); \ + unpuckU32ToThree_1(temp1[2]); \ + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; \ + unpuckU32ToThree_1(temp2[0]); \ + unpuckU32ToThree_1(temp2[1]); \ + unpuckU32ToThree_1(temp2[2]); \ + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; \ + memcpy(out, t, 12 * sizeof(unsigned char)); \ +} + +#define packU48FormatToThreePacket( out, in) {\ + u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); \ + u32 temp0[3] = { 0 }; \ + u32 temp1[3] = { 0 }; \ + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; \ + puckU32ToThree_1(temp0[0]); \ + puckU32ToThree_1(temp0[1]); \ + puckU32ToThree_1(temp0[2]); \ + temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; \ + puckU32ToThree_1(temp1[0]); \ + puckU32ToThree_1(temp1[1]); \ + puckU32ToThree_1(temp1[2]); \ + out[0] = (temp1[0] << 10) | temp0[2]; \ + out[1] = (temp1[2] << 11) | temp0[1]; \ + out[2] = (temp1[1] << 11) | temp0[0]; \ +} + +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +//55=3*18+1 +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} +#define ROUND384(lunNum) {\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ +s[5] = LOTR32(s_temp[3], 1); \ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +} + diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/crypto_hash.h b/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/crypto_hash.h new file mode 100644 index 0000000..0632591 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/crypto_hash.h @@ -0,0 +1,6 @@ + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/hash.c b/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/hash.c new file mode 100644 index 0000000..746235b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_3_1/hash.c @@ -0,0 +1,64 @@ +#include"auxFormat.h" + +//#define hash_RATE (48 / 8) +#define hash_RATE 6 + +#define PRH_ROUNDS 104 +//12*9=108 +unsigned char constant7Format[104] = { +/*constant7Format[127]:*/ +0x01, 0x08, 0x40, 0x02, 0x10, 0x80, 0x05, 0x09, 0x48, 0x42, 0x12, 0x90, 0x85, + 0x0c, 0x41, 0x0a, 0x50, 0x82, 0x15, 0x89, 0x4d, 0x4b, 0x5a, 0xd2, 0x97, + 0x9c, 0xc4, 0x06, 0x11, 0x88, 0x45, 0x0b, 0x58, 0xc2, 0x17, 0x99, 0xcd, + 0x4e, 0x53, 0x9a, 0xd5, 0x8e, 0x54, 0x83, 0x1d, 0xc9, 0x4f, 0x5b, 0xda, + 0xd7, 0x9e, 0xd4, 0x86, 0x14, 0x81, 0x0d, 0x49, 0x4a, 0x52, 0x92, 0x95, + 0x8c, 0x44, 0x03, 0x18, 0xc0, 0x07, 0x19, 0xc8, 0x47, 0x1b, 0xd8, 0xc7, + 0x1e, 0xd1, 0x8f, 0x5c, 0xc3, 0x1f, 0xd9, 0xcf, 0x5e, 0xd3, 0x9f, 0xdc, + 0xc6, 0x16, 0x91, 0x8d, 0x4c, 0x43, 0x1a, 0xd0, 0x87, 0x1c, 0xc1, 0x0f, + 0x59, 0xca, 0x57, 0x9b, 0xdd, 0xce, 0x56, }; +#define Processing_Data(data) \ +do { \ +packU48FormatToThreePacket(dataFormat, data);\ +s[0] ^= dataFormat[0];\ +s[1] ^= dataFormat[1];\ +s[2] ^= dataFormat[2];\ +} while (0) + +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 s_temp[12] = { 0 }; + u8 i; + u32 dataFormat[3] = { 0 }; + // initialization + u32 s[12] = { 0 }; + u8 tempData[12]; + //absorb + while (inlen >= hash_RATE) { + Processing_Data(in); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + inlen -= hash_RATE; + in += hash_RATE; +} + memset(tempData, 0, hash_RATE); + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + Processing_Data(tempData); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + //sequeez + unpackU96FormatToThreePacket(out, s); + unpackU96FormatToThreePacket((out + 12), (s + 3)); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + out += CRYPTO_BYTES / 2; + unpackU96FormatToThreePacket(out, s); + unpackU96FormatToThreePacket((out + 12), (s + 3)); + return 0; +} + diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/api.h b/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/api.h new file mode 100644 index 0000000..249c3fd --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/api.h @@ -0,0 +1,2 @@ + +#define CRYPTO_BYTES 48 diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/auxFormat.c b/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/auxFormat.c new file mode 100644 index 0000000..1d799e6 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/auxFormat.c @@ -0,0 +1,76 @@ +#include"auxFormat.h" +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0]>>10) & 0x7ff; + temp1[1] = (in[2] >>11 ) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); +} + +void packU48FormatToThreePacket(u32 * out, u8 * in) { + u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + out[0] = (temp1[0] << 10) | temp0[2]; + out[1] = (temp1[2] << 11) | temp0[1]; + out[2] = (temp1[1] << 11) | temp0[0]; +} + + + +unsigned char constant7Format[104] = { + /*constant7Format[127]: 12*9=108*/ +0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, +0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, +0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, +0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, +0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, +0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, +0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, +0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, +0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,}; +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) { + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + unsigned char i; + for (i = 0; i < lunNum; i++) { + s[0] ^= (round[i] >> 6) & 0x3; + s[1] ^= (round[i] >> 3) & 0x7; + s[2] ^= round[i] & 0x7; + sbox(s[0], s[3], s[6], s[9], s_temp[3], s_temp[6], s_temp[9]); + sbox(s[1], s[4], s[7], s[10], s[3], s_temp[7], s_temp[10]); + sbox(s[2], s[5], s[8], s[11], s[4], s_temp[8], s_temp[11]); + s[5] = LOTR32(s_temp[3], 1); + U96_BIT_LOTR32_8(s_temp[6], s_temp[7], s_temp[8], s[6], s[7], s[8]); + U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]); + } +} diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/auxFormat.h new file mode 100644 index 0000000..d9d24e4 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/auxFormat.h @@ -0,0 +1,57 @@ +#include +#include"crypto_hash.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +void packU48FormatToThreePacket(u32 * out, u8 * in) ; + +void P384(unsigned int *s, unsigned char *round, unsigned char lunNum) ; +void unpackU96FormatToThreePacket(u8 * out, u32 * in) ; + +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +//55=3*18+1 +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} +unsigned char constant7Format[104]; + diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/crypto_hash.h b/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/crypto_hash.h new file mode 100644 index 0000000..0632591 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/crypto_hash.h @@ -0,0 +1,6 @@ + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/hash.c b/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/hash.c new file mode 100644 index 0000000..020c61d --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_4_1/hash.c @@ -0,0 +1,44 @@ +#include"auxFormat.h" + +//#define hash_RATE (48 / 8) +#define hash_RATE 6 + +#define PRH_ROUNDS 104 + +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + u32 dataFormat[3] = { 0 }; + // initialization + u32 s[12] = { 0 }; + u8 tempData[12]; + //absorb + while (inlen >= hash_RATE) { + packU48FormatToThreePacket(dataFormat, in); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384(s, constant7Format, PRH_ROUNDS); + inlen -= hash_RATE; + in += hash_RATE; + } + memset(tempData, 0, hash_RATE); + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + packU48FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + + P384(s, constant7Format, PRH_ROUNDS); + //sequeez + + unpackU96FormatToThreePacket(out, s); + unpackU96FormatToThreePacket(out + 12, s + 3); + + P384(s, constant7Format, PRH_ROUNDS); + out += CRYPTO_BYTES / 2; + unpackU96FormatToThreePacket(out, s); + unpackU96FormatToThreePacket(out + 12, s + 3); + return 0; +} + diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_6/api.h b/knot/Implementations/crypto_hash/knot384/armcortexm_6/api.h new file mode 100644 index 0000000..249c3fd --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_6/api.h @@ -0,0 +1,2 @@ + +#define CRYPTO_BYTES 48 diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c new file mode 100644 index 0000000..a12c2aa --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.c @@ -0,0 +1,88 @@ +#include"auxFormat.h" + +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0]>>10) & 0x7ff; + temp1[1] = (in[2] >>11 ) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); +} + +void packU48FormatToThreePacket(u32 * out, u8 * in) { + u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + temp0[0] = U32BIG(((u32*)in)[0]); temp0[1] = U32BIG(((u32*)in)[0]) >> 1; temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = t1; temp1[1] = t1 >> 1; temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + out[0] = (temp1[0] << 10) | temp0[2]; + out[1] = (temp1[2] << 11) | temp0[1]; + out[2] = (temp1[1] << 11) | temp0[0]; +} + +unsigned char constant7Format[104] = { + /*constant7Format[127]: 12*9=108*/ +0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, +0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, +0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, +0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, +0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, +0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, +0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, +0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, +0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,}; +void ROUND384_Three(unsigned int *s, unsigned char *c, int lunnum) { + unsigned int t, t1, t2; + u32 rci, temp; + rci = c[0]; + ARC(rci); + SBOX(s[0], s[3], s[6], s[9]); + SBOX(s[1], s[4], s[7], s[10]); + SBOX(s[2], s[5], s[8], s[11]); + t = 1; + while (lunnum--) { + temp = ((u32*) (c + t))[0]; + rci = temp & 0xff; + ARC(rci); + SBOX1_ROR(s[0], s[4], s[8], s[10] ); + SBOX2_ROR(s[1], s[5], s[6], s[11]); + SBOX3_ROR(s[2], s[3], s[7], s[9]); + rci = (temp & 0xff00) >> 8; + ARC(rci); + SBOX1_ROR(s[0], s[5], s[7], s[11]); + SBOX2_ROR(s[1], s[3], s[8], s[9]); + SBOX3_ROR(s[2], s[4], s[6], s[10]); + rci = (temp & 0xff0000) >> 16; + ARC(rci); + SBOX1_ROR(s[0], s[3], s[6], s[9]); + SBOX2_ROR(s[1], s[4], s[7], s[10]); + SBOX3_ROR(s[2], s[5], s[8], s[11]); + t += 3; + } +} diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h new file mode 100644 index 0000000..5353b39 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_6/auxFormat.h @@ -0,0 +1,164 @@ +#include +#include"crypto_hash.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + + +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} +#define ARC(rci) \ + do { \ + __asm__ __volatile__ ( \ + "/*add round const s0 s1 s2 */ \n\t"\ + "ands %[t1], %[rci], #0xc0\n\t" \ + "eors %[S_0], %[S_0], %[t1], LSR #6 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x38\n\t" \ + "eors %[S_1], %[S_1], %[t1], LSR #3 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x7\n\t" \ + "eors %[S_3], %[S_3], %[t1] \n\t" /*s[2] ^= constant7Format[lunNum] & 0x7;*/\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [rci] "+r" (rci), \ + [S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_3] "+r" (s[2])\ + : : );\ +}while (0) +#define SBOX(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX1_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[t1] , %[S_4] ,ROR #30 \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] ,ROR #30 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_6], %[t1], %[S_6] ,ROR #14 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX2_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[t1] , %[S_4] ,ROR #29 \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] ,ROR #29 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] ,ROR #14 \n\t"\ + "eors %[S_6], %[t1], %[S_6] ,ROR #14 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) + +#define SBOX3_ROR(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_0], %[S_2] ,ROR #31 \n\t"\ + "eors %[t1], %[t1], %[S_4] ,ROR #29 \n\t"\ + "orrs %[S_4], %[S_4], %[S_2] ,ROR #2 \n\t"\ + "eors %[S_0], %[S_0], %[S_6] ,ROR #13 \n\t"\ + "eors %[S_4], %[S_0], %[S_4] ,ROR 29 \n\t"\ + "eors %[t2], %[S_6], %[S_2] ,ROR #18 \n\t"\ + "eors %[S_6], %[t1] , %[S_6] ,ROR #13 \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[S_0] , %[t2] ,ROR #13 \n\t"\ + "ands %[S_2], %[S_4], %[t2] ,ROR #13 \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) + +#define P384_2( s, round, lunNum) {\ + u32 t1,rci;\ + ROUND384_Three(s,round,lunNum);\ + rci=round[lunNum*3+1];\ + ARC(rci);\ + SBOX1_ROR(s[0], s[4], s[8], s[10] );\ + SBOX2_ROR(s[1], s[5], s[6], s[11]);\ + SBOX3_ROR(s[2], s[3], s[7], s[9]);\ + __asm__ __volatile__ ( \ + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ + "mov %[t1], %[S_4] \n\t"\ + "mov %[S_4], %[S_3] \n\t"\ + "mov %[S_3], %[S_5] \n\t"\ + "ROR %[S_5], %[t1] , #31 \n\t"\ + "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t"\ + "mov %[t1], %[S_8] \n\t"\ + "ROR %[S_8], %[S_6] , #29 \n\t"\ + "ROR %[S_6], %[S_7] , #30 \n\t"\ + "ROR %[S_7], %[t1] , #29 \n\t"\ + "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t1], %[S_10] \n\t"\ + "ROR %[S_10], %[S_9] , #14 \n\t"\ + "ROR %[S_9], %[S_11] , #14 \n\t"\ + "ROR %[S_11], %[t1] , #13 \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1),\ + [S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\ + [S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\ + [S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\ + : : );\ +} + +unsigned char constant7Format[104]; + diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_6/crypto_hash.h b/knot/Implementations/crypto_hash/knot384/armcortexm_6/crypto_hash.h new file mode 100644 index 0000000..0632591 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_6/crypto_hash.h @@ -0,0 +1,6 @@ + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/armcortexm_6/hash.c b/knot/Implementations/crypto_hash/knot384/armcortexm_6/hash.c new file mode 100644 index 0000000..3a9b7b5 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/armcortexm_6/hash.c @@ -0,0 +1,44 @@ +#include"auxFormat.h" + +#define hash_RATE 6 +//#define hash_RATE (48 / 8) + +#define PRH_ROUNDS 34 +//#define PRH_ROUNDS 104 104/3=34+2 + +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + u32 dataFormat[3] = { 0 }, t1, t2; + // initialization + u32 s[12] = { 0 }; + u8 tempData[12]; + //absorb + while (inlen >= hash_RATE) { + packU48FormatToThreePacket(dataFormat, in); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384_2(s, constant7Format, PRH_ROUNDS); + inlen -= hash_RATE; + in += hash_RATE; + } + memset(tempData, 0, hash_RATE); + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + packU48FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384_2(s, constant7Format, PRH_ROUNDS); + //sequeez + + unpackU96FormatToThreePacket(out, s); + unpackU96FormatToThreePacket(out + 12, s + 3); + + P384_2(s, constant7Format, PRH_ROUNDS); + out += CRYPTO_BYTES / 2; + unpackU96FormatToThreePacket(out, s); + unpackU96FormatToThreePacket(out + 12, s + 3); + return 0; +} + diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_6/api.h b/knot/Implementations/crypto_hash/knot512/armcortexm_6/api.h new file mode 100644 index 0000000..5055cbf --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_6/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 64 + diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.c b/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.c new file mode 100644 index 0000000..4d2cbf5 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.c @@ -0,0 +1,93 @@ +#include"auxFormat.h" + +void P512(unsigned int *s, unsigned char *round, unsigned char rounds) { + u32 rci, t1, t2, t3, t9; + unsigned char rcNum = 0; + rci = round[rcNum++]; + ARC(rci); + SBOX1(s[0], s[4], s[8 ], s[12]); + SBOX1(s[1], s[5], s[9 ], s[13]); + SBOX1(s[2], s[6], s[10], s[14]); + SBOX1(s[3], s[7], s[11], s[15]); + while (rounds--) { + rci = round[rcNum++]; + ARC(rci); + SBOX2(s[0], s[7], s[8], s[15]); + SBOX3(s[1], s[4], s[9 ], s[12]); + SBOX3(s[2], s[5], s[10], s[13]); + SBOX3(s[3], s[6], s[11], s[14]); + rci = round[rcNum++]; + ARC(rci); + SBOX2(s[0], s[6], s[8], s[14]); + SBOX3(s[1], s[7], s[9 ], s[15]); + SBOX3(s[2], s[4], s[10], s[12]); + SBOX3(s[3], s[5], s[11], s[13]); + rci = round[rcNum++]; + ARC(rci); + SBOX2(s[0], s[5], s[8], s[13]); + SBOX3(s[1], s[6], s[9 ], s[14]); + SBOX3(s[2], s[7], s[10], s[15]); + SBOX3(s[3], s[4], s[11], s[12]); + rci = round[rcNum++]; + ARC(rci); + SBOX2(s[0], s[4], s[8 ], s[12]); + SBOX3(s[1], s[5], s[9 ], s[13]); + SBOX3(s[2], s[6], s[10], s[14]); + SBOX3(s[3], s[7], s[11], s[15]); + } + rci = round[rcNum++]; + ARC(rci); + SBOX2(s[0], s[7], s[8], s[15]); + SBOX3(s[1], s[4], s[9 ], s[12]); + SBOX3(s[2], s[5], s[10], s[13]); + SBOX3(s[3], s[6], s[11], s[14]); + rci = round[rcNum++]; + ARC(rci); + SBOX2(s[0], s[6], s[8], s[14]); + SBOX3(s[1], s[7], s[9 ], s[15]); + SBOX3(s[2], s[4], s[10], s[12]); + SBOX3(s[3], s[5], s[11], s[13]); + rci = round[rcNum++]; + ARC(rci); + SBOX2(s[0], s[5], s[8], s[13]); + SBOX3(s[1], s[6], s[9 ], s[14]); + SBOX3(s[2], s[7], s[10], s[15]); + SBOX3(s[3], s[4], s[11], s[12]); + SR(s[4], s[8], s[9], s[10], s[11], s[12], s[13], s[14], s[15]); +} +void packU64FormatToFourPacket(u32 *out, u8 *in) { + u32 t1, t2, temp1; + t1 = U32BIG(((u32* )in)[0]); + t2 = U32BIG(((u32* )in)[1]); + puck32(t1); + puck32(t1); + puck32(t2); + puck32(t2); + out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); + out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); + out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); + out[0] = ((t2 << 8) & 0x0000ff00) | (t1 & 0x000000ff); +} + +void unpackU128FormatToFourPacket(u8 *out, u32 *in) { + u32 t[4] = { 0 }; + u32 r0; + t[3] = (in[3] & 0xff000000) | ((in[2] >> 8) & 0x00ff0000) + | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); + t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) + | ((in[1] >> 8) & 0x0000ff00) | ((in[0] >> 16) & 0x000000ff); + t[1] = ((in[3] << 16) & 0xff000000) | ((in[2] << 8) & 0x00ff0000) + | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); + t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) + | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); + unpuck32(t[0]); + unpuck32(t[0]); + unpuck32(t[1]); + unpuck32(t[1]); + unpuck32(t[2]); + unpuck32(t[2]); + unpuck32(t[3]); + unpuck32(t[3]); + memcpy(out, t, 16 * sizeof(unsigned char)); +} + diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.h b/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.h new file mode 100644 index 0000000..07ff59e --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_6/auxFormat.h @@ -0,0 +1,135 @@ +#include +#include +#include"crypto_hash.h" +#include"api.h" +#include +#define U32BIG(x) (x) + + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define puck32(in)\ +{\ +temp1 = (in ^ (in >> 1)) & 0x22222222; in ^= temp1 ^ (temp1 << 1);\ +temp1 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= temp1 ^ (temp1 << 2);\ +temp1 = (in ^ (in >> 4)) & 0x00F000F0; in ^= temp1 ^ (temp1 << 4);\ +temp1 = (in ^ (in >> 8)) & 0x0000FF00; in ^= temp1 ^ (temp1 << 8);\ +} +#define unpuck32(t0){\ + r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); \ + r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); \ + r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); \ + r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); \ +} + +void P512(unsigned int *s, unsigned char *round, unsigned char rounds); +void unpackU128FormatToFourPacket(u8 *out, u32 *in); +void packU64FormatToFourPacket(u32 *out, u8 *in) ; +#define ARC(rci) \ + do { \ + __asm__ __volatile__ ( \ + "/*add round const s0 s1 s2 s3*/ \n\t"\ + "ands %[t1] , %[rci], #0xc0\n\t" \ + "eors %[S_3], %[S_3], %[t1], LSR #6 \n\t" /*s[3] ^= (constant7Format_aead[lunNum] >> 6) & 0x3;*/\ + "ands %[t2] , %[rci], #0x30\n\t" \ + "eors %[S_2], %[S_2], %[t2], LSR #4 \n\t" /*s[2] ^= (constant7Format_aead[lunNum] >> 4) & 0x3;*/\ + "ands %[t3] , %[rci], #0xc\n\t" \ + "eors %[S_1], %[S_1], %[t3], LSR #2 \n\t" /*s[1] ^= (constant7Format_aead[lunNum] >> 2) & 0x3;*/\ + "ands %[t4] , %[rci], #0x3\n\t" \ + "eors %[S_0], %[S_0], %[t4] \n\t" /*s[0] ^= constant7Format_aead[lunNum] & 0x3;*/\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2), [t3] "=r" (t3), [t4] "=r" (t9), [rci] "+r" (rci),\ + [S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_2] "+r" (s[2]),[S_3] "+r" (s[3])\ + : : );\ +}while (0) +#define SBOX2(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "ROR %[S_2] , #31 \n\t"\ + "ROR %[S_4] , #28 \n\t"\ + "ROR %[S_6] , #25 \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX3(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "ROR %[S_4] , #28 \n\t"\ + "ROR %[S_6] , #26 \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) + +#define SBOX1(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SR(S4,S8,S9,S10,S11,S12,S13,S14,S15) \ + do { \ + __asm__ __volatile__ ( \ + "ROR %[S_4] , #31 \n\t"\ + "ROR %[S_11] , #28 \n\t"\ + "ROR %[S_10] , #28 \n\t"\ + "ROR %[S_9] , #28 \n\t"\ + "ROR %[S_8] , #28 \n\t"\ + "ROR %[S_12] , #25 \n\t"\ + "ROR %[S_13] , #26 \n\t"\ + "ROR %[S_14] , #26 \n\t"\ + "ROR %[S_15] , #26 \n\t"\ + : /* output variables - including inputs that are changed */\ + [S_4] "+r" (S4),\ + [S_12] "+r" (S12), [S_8] "+r" (S8) ,\ + [S_13] "+r" (S13), [S_9] "+r" (S9) ,\ + [S_14] "+r" (S14), [S_10] "+r" (S10),\ + [S_15] "+r" (S15), [S_11] "+r" (S11)\ + : : );\ +}while (0) diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_6/crypto_hash.h b/knot/Implementations/crypto_hash/knot512/armcortexm_6/crypto_hash.h new file mode 100644 index 0000000..0632591 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_6/crypto_hash.h @@ -0,0 +1,6 @@ + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/armcortexm_6/hash.c b/knot/Implementations/crypto_hash/knot512/armcortexm_6/hash.c new file mode 100644 index 0000000..d98e140 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/armcortexm_6/hash.c @@ -0,0 +1,64 @@ +#include"auxFormat.h" + +//#define PRH_ROUNDS 140 /4=35 + +#define PRH_ROUNDS 34 +#define hash_RATE 8 +//#define hash_RATE (64 / 8) +unsigned char constant8Format_hash[140] = { +/*constant8_hash_512*/ +0x1, 0x4, 0x10, 0x40, 0x3, 0xd, 0x35, 0xd4, 0x52, 0x4a, 0x2b, 0xac, 0xb0, 0xc1, + 0x6, 0x19, 0x65, 0x97, 0x5c, 0x72, 0xca, 0x2a, 0xa8, 0xa0, 0x81, 0x5, + 0x14, 0x50, 0x43, 0xe, 0x38, 0xe1, 0x86, 0x18, 0x61, 0x87, 0x1c, 0x71, + 0xc7, 0x1f, 0x7c, 0xf2, 0xcb, 0x2e, 0xb8, 0xe0, 0x82, 0x8, 0x21, 0x84, + 0x11, 0x44, 0x13, 0x4d, 0x36, 0xd9, 0x67, 0x9e, 0x79, 0xe6, 0x9b, 0x6d, + 0xb6, 0xd8, 0x63, 0x8e, 0x39, 0xe5, 0x96, 0x58, 0x62, 0x8a, 0x29, 0xa5, + 0x95, 0x55, 0x57, 0x5e, 0x7b, 0xef, 0xbe, 0xf9, 0xe7, 0x9f, 0x7d, 0xf6, + 0xdb, 0x6e, 0xbb, 0xed, 0xb7, 0xdc, 0x73, 0xce, 0x3a, 0xe8, 0xa3, 0x8c, + 0x30, 0xc0, 0x2, 0x9, 0x25, 0x94, 0x51, 0x47, 0x1e, 0x78, 0xe2, 0x8b, + 0x2d, 0xb5, 0xd5, 0x56, 0x5a, 0x6b, 0xaf, 0xbd, 0xf4, 0xd2, 0x4b, 0x2f, + 0xbc, 0xf0, 0xc2, 0xb, 0x2c, 0xb1, 0xc5, 0x16, 0x59, 0x66, 0x9a, 0x69, + 0xa6, 0x98, 0x60, 0x83, 0xc, 0x31, }; + +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + + u32 dataFormat[4] = { 0 }; + // initialization + u32 s[16] = { 0 }; + u8 tempData[32]; + //absorb + + while (inlen >= hash_RATE) { + packU64FormatToFourPacket(dataFormat, in); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + P512(s, constant8Format_hash, PRH_ROUNDS); + inlen -= hash_RATE; + in += hash_RATE; + } + memset(tempData, 0, hash_RATE); + + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + packU64FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + + P512(s, constant8Format_hash, PRH_ROUNDS); + //sequeez + + unpackU128FormatToFourPacket(out, s); + unpackU128FormatToFourPacket((out + 16), (s + 4)); + + P512(s, constant8Format_hash, PRH_ROUNDS); + out += CRYPTO_BYTES / 2; + unpackU128FormatToFourPacket(out, s); + unpackU128FormatToFourPacket((out + 16), (s + 4)); + return 0; +} + diff --git a/knot/Implementations/crypto_hash/main.c b/knot/Implementations/crypto_hash/main.c new file mode 100644 index 0000000..a8f8c82 --- /dev/null +++ b/knot/Implementations/crypto_hash/main.c @@ -0,0 +1,213 @@ +/* USER CODE BEGIN Header */ +/** + ****************************************************************************** + * @file : main.c + * @brief : Main program body + ****************************************************************************** + * @attention + * + *

© Copyright (c) 2020 STMicroelectronics. + * All rights reserved.

+ * + * This software component is licensed by ST under BSD 3-Clause license, + * the "License"; You may not use this file except in compliance with the + * License. You may obtain a copy of the License at: + * opensource.org/licenses/BSD-3-Clause + * + ****************************************************************************** + */ +/* USER CODE END Header */ +/* Includes ------------------------------------------------------------------*/ +#include "main.h" +#include "stdio.h" +/* USER CODE END Includes */ +/* USER CODE BEGIN 0 */ +int _write(int file , char *ptr,int len) +{ + int i = 0; + for(i = 0;i +#include + +#include "crypto_hash.h" +#include "api.h" + +#include"auxFormat.h" +//#include "test_TimingMbps.h" +//#include "timingCpb.h" + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) + +#define PRINTU8 +void printU8(char name[], u8 var[], long len, int offset) { +#ifdef PRINTU8 + int i, t = 512 / 32; + printf("%s[%d]:\n", name, len); + for (i = 0; i < len; i++) { + printf("%02x|", var[i + offset]); + if (i%t == t - 1) + printf("\n"); + } + printf("\n"); +#endif +} +/* +Count = 15 +Msg = 000102030405060708090A0B0C0D +MD = BAF461E205000D0141E5520F1B23B22F18D1C18AF571ABB967A2536084A911F5 +*/ +void test_demohash() { + + // u8 in[] = {}; // "ASCON" + u8 in[14] = { 0 }; // "ASCON" + u8 out[32]; // "ASCON" + int i; + for (i = 0; i < ARR_SIZE(in); i++) { + in[i] = i; + } + printU8("in", in, ARR_SIZE(in), 0); + + crypto_hash(out, in, ARR_SIZE(in)); + printU8("out", out, ARR_SIZE(out), 0); +} + +int zxf=0; +int main(void) +{ + HAL_Init(); + SystemClock_Config(); + test_demohash() ; + while (1) + { + zxf++; + generate_test_vectors_hash(); + } +} +/** + * @brief System Clock Configuration + * @retval None + */ +void SystemClock_Config(void) +{ + RCC_OscInitTypeDef RCC_OscInitStruct = {0}; + RCC_ClkInitTypeDef RCC_ClkInitStruct = {0}; + + /** Configure the main internal regulator output voltage + */ + __HAL_RCC_PWR_CLK_ENABLE(); + __HAL_PWR_VOLTAGESCALING_CONFIG(PWR_REGULATOR_VOLTAGE_SCALE1); + /** Initializes the RCC Oscillators according to the specified parameters + * in the RCC_OscInitTypeDef structure. + */ + RCC_OscInitStruct.OscillatorType = RCC_OSCILLATORTYPE_HSI; + RCC_OscInitStruct.HSIState = RCC_HSI_ON; + RCC_OscInitStruct.HSICalibrationValue = RCC_HSICALIBRATION_DEFAULT; + RCC_OscInitStruct.PLL.PLLState = RCC_PLL_ON; + RCC_OscInitStruct.PLL.PLLSource = RCC_PLLSOURCE_HSI; + RCC_OscInitStruct.PLL.PLLM = 8; + RCC_OscInitStruct.PLL.PLLN = 216; + RCC_OscInitStruct.PLL.PLLP = RCC_PLLP_DIV2; + RCC_OscInitStruct.PLL.PLLQ = 2; + if (HAL_RCC_OscConfig(&RCC_OscInitStruct) != HAL_OK) + { + Error_Handler(); + } + /** Activate the Over-Drive mode + */ + if (HAL_PWREx_EnableOverDrive() != HAL_OK) + { + Error_Handler(); + } + /** Initializes the CPU, AHB and APB buses clocks + */ + RCC_ClkInitStruct.ClockType = RCC_CLOCKTYPE_HCLK|RCC_CLOCKTYPE_SYSCLK + |RCC_CLOCKTYPE_PCLK1|RCC_CLOCKTYPE_PCLK2; + RCC_ClkInitStruct.SYSCLKSource = RCC_SYSCLKSOURCE_PLLCLK; + RCC_ClkInitStruct.AHBCLKDivider = RCC_SYSCLK_DIV1; + RCC_ClkInitStruct.APB1CLKDivider = RCC_HCLK_DIV4; + RCC_ClkInitStruct.APB2CLKDivider = RCC_HCLK_DIV2; + + if (HAL_RCC_ClockConfig(&RCC_ClkInitStruct, FLASH_LATENCY_7) != HAL_OK) + { + Error_Handler(); + } +} + +/* USER CODE BEGIN 4 */ + +/* USER CODE END 4 */ + +/** + * @brief This function is executed in case of error occurrence. + * @retval None + */ +void Error_Handler(void) +{ + /* USER CODE BEGIN Error_Handler_Debug */ + /* User can add his own implementation to report the HAL error return state */ + + /* USER CODE END Error_Handler_Debug */ +} + +#ifdef USE_FULL_ASSERT +/** + * @brief Reports the name of the source file and the source line number + * where the assert_param error has occurred. + * @param file: pointer to the source file name + * @param line: assert_param error line source number + * @retval None + */ +void assert_failed(uint8_t *file, uint32_t line) +{ + /* USER CODE BEGIN 6 */ + /* User can add his own implementation to report the file name and line number, + tex: printf("Wrong parameters value: file %s on line %d\r\n", file, line) */ + /* USER CODE END 6 */ +} +#endif /* USE_FULL_ASSERT */ + +/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/