#include"auxFormat.h" #define aead_RATE (192 / 8) #define PR0_ROUNDS 76 #define PR_ROUNDS 28 #define PRF_ROUNDS 32 unsigned char constant7Format[127] = { /*constant7Format[127]:*/ 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, 0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, 0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46, 0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb, 0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, }; /* State * w8 w4 w0 * w9 w5 w1 * w10 w6 w2 * w11 w7 w3 */ static void permutation384(unsigned int *in, int rounds, unsigned char *rc) { uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11; uint32_t s0, s1, s2; uint32_t i=0; __asm volatile( "ldr w0, [in] \n\t" "ldr w4, [in, #4] \n\t" "ldr w8, [in, #8] \n\t" "ldr w1, [in, #12] \n\t" "ldr w5, [in, #16] \n\t" "ldr w9, [in, #20] \n\t" "ldr w2, [in, #24] \n\t" "ldr w6, [in, #28] \n\t" "ldr w10, [in, #32] \n\t" "ldr w3, [in, #36] \n\t" "ldr w7, [in, #40] \n\t" "ldr w11, [in, #44] \n\t" "enc_loop: \n\t" "/*add round const s0 s1*/ \n\t" "ldrb s0, [rc] \n\t" "LSR s1, s0, #6 \n\t" "and s1, s1, 0x3 \n\t" "LSR s2, s0, #3 \n\t" "and s2, s2, 0x7 \n\t" "and s0, s0, 0x7 \n\t" "eors w8, w8, s0 \n\t" "eors w4, w4, s2 \n\t" "eors w0, w0, s1 \n\t" "/*sbox first column*/ \n\t" "mvns w0, w0 \n\t" "ands s0, w1, w0 \n\t" "eors s0, w2, s0 \n\t" "orrs w2, w1, w2 \n\t" "eors w0, w3, w0 \n\t" "eors w2, w2, w0 \n\t" "eors s1, w1, w3 \n\t" "eors w3, w3, s0 \n\t" "ands w0, s0, w0 \n\t" "eors w0, s1, w0 \n\t" "ands w1, w2, s1 \n\t" "eors w1, s0, w1 \n\t" "/*sbox second column*/ \n\t" "mvns w4, w4 \n\t" "ands s0, w5, w4 \n\t" "eors s0, w6, s0 \n\t" "orrs w6, w5, w6 \n\t" "eors w4, w7, w4 \n\t" "eors w6, w6, w4 \n\t" "eors s1, w5, w7 \n\t" "eors w7, w7, s0 \n\t" "ands w4, s0, w4 \n\t" "eors w4, s1, w4 \n\t" "ands w5, w6, s1 \n\t" "eors w5, s0, w5 \n\t" "/*sbox third column*/ \n\t" "mvns w8, w8 \n\t" "ands s0, w9, w8 \n\t" "eors s0, w10, s0 \n\t" "orrs w10, w9, w10 \n\t" "eors w8, w11, w8 \n\t" "eors w10, w10, w8 \n\t" "eors s1, w9, w11 \n\t" "eors w11, w11, s0 \n\t" "ands w8, s0, w8 \n\t" "eors w8, s1, w8 \n\t" "ands w9, w10, s1 \n\t" "eors w9, s0, w9 \n\t" "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t" "mov s0, w1 \n\t" "mov w1, w5 \n\t" "mov w5, w9 \n\t" "ROR w9, s0, #31 \n\t" "/*rotate shift left 8 bits [w10 w6 w2-> £¨w6,3) (w2,3) ( w10,2)]*/ \n\t" "mov s0, w10 \n\t" "ROR w10, w6 , #29 \n\t" "ROR w6, w2 , #29 \n\t" "ROR w2, s0, #30 \n\t" "/*rotate shift left 55 bit [w11 w7 w3-> £¨w3,13) (w11,14) ( w7,14)] */ \n\t" "mov s0, w3 \n\t" "ROR w3, w7 , #14 \n\t" "ROR w7, w11 , #14 \n\t" "ROR w11, s0, #13 \n\t" "/*loop control*/ \n\t" "adds rc, rc, #1 \n\t" "subs rounds, rounds, #1 \n\t" "bne enc_loop \n\t" "str w0, [in] \n\t" "str w4, [in, #4] \n\t" "str w8, [in, #8] \n\t" "str w1, [in, #12] \n\t" "str w5, [in, #16] \n\t" "str w9, [in, #20] \n\t" "str w2, [in, #24] \n\t" "str w6, [in, #28] \n\t" "str w10, [in, #32] \n\t" "str w3, [in, #36] \n\t" "str w7, [in, #40] \n\t" "str w11, [in, #44] \n\t" ); } int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, const unsigned char *m, unsigned long long mlen, const unsigned char *ad, unsigned long long adlen, const unsigned char *nsec, const unsigned char *npub, const unsigned char *k) { u8 i; u32 s[12] = { 0 }; u8 tempData[24] = { 0 }; u32 dataFormat[6] = { 0 }; u32 s_temp[12] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; u32 t1_32, t2_64, t2_65; u32 temp0[3] = { 0 }; u32 temp1[3] = { 0 }; u32 temp2[3] = { 0 }; *clen = mlen + CRYPTO_ABYTES; // initialization packU96FormatToThreePacket(s, npub); memcpy(tempData, npub+12, sizeof(unsigned char)*4); memcpy(tempData+4, k, sizeof(unsigned char) * 16); packU96FormatToThreePacket((s + 3), tempData); packU96FormatToThreePacket((s + 6), (tempData+12)); s[9] = 0x80000000; permutation384(s,PR0_ROUNDS,constant7Format); // process associated data if (adlen) { // rlen = adlen; while (adlen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, ad); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; packU96FormatToThreePacket((dataFormat+3), (ad+12)); s[3] ^= dataFormat[3]; s[4] ^= dataFormat[4]; s[5] ^= dataFormat[5]; permutation384(s,PR_ROUNDS,constant7Format); adlen -= aead_RATE; ad += aead_RATE; } memset(tempData, 0, sizeof(tempData)); memcpy(tempData, ad, adlen * sizeof(unsigned char)); tempData[adlen] = 0x01; packU96FormatToThreePacket(dataFormat, tempData); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); s[3] ^= dataFormat[3]; s[4] ^= dataFormat[4]; s[5] ^= dataFormat[5]; permutation384(s,PR_ROUNDS,constant7Format); } s[9] ^= 0x80000000; if (mlen) { while (mlen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, m); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; packU96FormatToThreePacket((dataFormat + 3), (m + 12)); s[3] ^= dataFormat[3]; s[4] ^= dataFormat[4]; s[5] ^= dataFormat[5]; unpackU96FormatToThreePacket(c, s); unpackU96FormatToThreePacket((c+12), (s+3)); permutation384(s,PR_ROUNDS,constant7Format); mlen -= aead_RATE; m += aead_RATE; c += aead_RATE; } memset(tempData, 0, sizeof(tempData)); memcpy(tempData, m, mlen * sizeof(unsigned char)); tempData[mlen]= 0x01; packU96FormatToThreePacket(dataFormat, tempData); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); s[3] ^= dataFormat[3]; s[4] ^= dataFormat[4]; s[5] ^= dataFormat[5]; unpackU96FormatToThreePacket(tempData, s); unpackU96FormatToThreePacket((tempData+12), (s+3)); memcpy(c, tempData, mlen * sizeof(unsigned char)); c += mlen; } // finalization permutation384(s,PRF_ROUNDS,constant7Format); // return tag unpackU96FormatToThreePacket(c, s); unpackU96FormatToThreePacket(tempData, (s + 3)); memcpy(c+12, tempData, sizeof(unsigned char) * 4); return 0; } int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, unsigned char *nsec, const unsigned char *c, unsigned long long clen, const unsigned char *ad, unsigned long long adlen, const unsigned char *npub, const unsigned char *k) { u8 i, j; u32 s[12] = { 0 }; u32 s_temp[12] = { 0 }; u32 dataFormat[12] = { 0 }; u32 dataFormat_1[12] = { 0 }; u8 tempData[24] = { 0 }; u8 tempU8[24] = { 0 }; u32 t1, t2, t3, t5, t6, t8, t9, t11; u32 t1_32, t2_64, t2_65; u32 temp0[3] = { 0 }; u32 temp1[3] = { 0 }; u32 temp2[3] = { 0 }; *mlen = clen - CRYPTO_ABYTES; if (clen < CRYPTO_ABYTES) return -1; // initialization packU96FormatToThreePacket(s, npub); memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); memcpy(tempData + 4, k, sizeof(unsigned char) * 16); packU96FormatToThreePacket((s + 3), tempData); packU96FormatToThreePacket((s + 6), (tempData + 12)); s[9] = 0x80000000; permutation384(s,PR0_ROUNDS,constant7Format); // process associated data if (adlen) { while (adlen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, ad); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; packU96FormatToThreePacket((dataFormat + 3), (ad + 12)); s[3] ^= dataFormat[3]; s[4] ^= dataFormat[4]; s[5] ^= dataFormat[5]; permutation384(s,PR_ROUNDS,constant7Format); adlen -= aead_RATE; ad += aead_RATE; } memset(tempData, 0, sizeof(tempData)); memcpy(tempData, ad, adlen * sizeof(unsigned char)); tempData[adlen] = 0x01; packU96FormatToThreePacket(dataFormat, tempData); s[0] ^= dataFormat[0]; s[1] ^= dataFormat[1]; s[2] ^= dataFormat[2]; packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); s[3] ^= dataFormat[3]; s[4] ^= dataFormat[4]; s[5] ^= dataFormat[5]; permutation384(s,PR_ROUNDS,constant7Format); } s[9] ^= 0x80000000; clen -= CRYPTO_ABYTES; if (clen) { while (clen >= aead_RATE) { packU96FormatToThreePacket(dataFormat, c); dataFormat_1[0] = s[0] ^ dataFormat[0]; dataFormat_1[1] = s[1] ^ dataFormat[1]; dataFormat_1[2] = s[2] ^ dataFormat[2]; packU96FormatToThreePacket((dataFormat+3), (c+12)); dataFormat_1[3] = s[3] ^ dataFormat[3]; dataFormat_1[4] = s[4] ^ dataFormat[4]; dataFormat_1[5] = s[5] ^ dataFormat[5]; unpackU96FormatToThreePacket(m, dataFormat_1); unpackU96FormatToThreePacket((m + 12), (dataFormat_1 + 3)); s[0] = dataFormat[0]; s[1] = dataFormat[1]; s[2] = dataFormat[2]; s[3] = dataFormat[3]; s[4] = dataFormat[4]; s[5] = dataFormat[5]; permutation384(s,PR_ROUNDS,constant7Format); clen -= aead_RATE; m += aead_RATE; c += aead_RATE; } unpackU96FormatToThreePacket(tempU8, s); unpackU96FormatToThreePacket((tempU8+12), (s+3)); for (i = 0; i < clen; ++i, ++m, ++c) { *m = tempU8[i] ^ *c; tempU8[i] = *c; } tempU8[i] ^= 0x01; packU96FormatToThreePacket(s, tempU8); packU96FormatToThreePacket((s + 3), (tempU8 + 12)); } // finalization permutation384(s,PRF_ROUNDS,constant7Format); // return tag unpackU96FormatToThreePacket(tempU8, s); unpackU96FormatToThreePacket((tempU8+12), (s+3)); if (U32BIG(((u32*)tempU8)[0]) != U32BIG(((u32*)c)[0]) || U32BIG(((u32*)tempU8)[1]) != U32BIG(((u32*)c)[1]) || U32BIG(((u32*)tempU8)[2]) != U32BIG(((u32*)c)[2]) || U32BIG(((u32*)tempU8)[3]) != U32BIG(((u32*)c)[3]) ){ return -1; } return 0; }