diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/api.h b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/encrypt.c new file mode 100644 index 0000000..4bc24fa --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/encrypt.c @@ -0,0 +1,1245 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = (len8 & 0x0f); + for (int i = 0; i < len8; i++) { + mp[i] = m[i]; + } + +#else + + mp[0] = 0; + mp[1] = 0; + mp[2] = 0; + mp[3] = 0; + mp[4] = 0; + mp[5] = 0; + mp[6] = 0; + mp[7] = 0; + mp[8] = 0; + mp[9] = 0; + mp[10] = 0; + mp[11] = 0; + mp[12] = 0; + mp[13] = 0; + mp[14] = 0; + mp[15] = (len8 & 0x0f); + for (int i = 0; i < len8; i++) { + mp[i] = m[i]; + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_WORD_CAST + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#else + + uint32_t s0, s1, s2, s3; + uint32_t c0, c1, c2, c3; + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + unpack_word(c[0], c[1], c[2], c[3], c0); + unpack_word(c[4], c[5], c[6], c[7], c1); + unpack_word(c[8], c[9], c[10], c[11], c2); + unpack_word(c[12], c[13], c[14], c[15], c3); + +#endif + +} + +#ifdef ___ENABLE_WORD_CAST + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +} + +#endif + +#define rho_ad_eqov16_macro(i) \ + s[i] = s[i] ^ m[i]; + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#else + + rho_ad_eqov16_macro(0); + rho_ad_eqov16_macro(1); + rho_ad_eqov16_macro(2); + rho_ad_eqov16_macro(3); + rho_ad_eqov16_macro(4); + rho_ad_eqov16_macro(5); + rho_ad_eqov16_macro(6); + rho_ad_eqov16_macro(7); + rho_ad_eqov16_macro(8); + rho_ad_eqov16_macro(9); + rho_ad_eqov16_macro(10); + rho_ad_eqov16_macro(11); + rho_ad_eqov16_macro(12); + rho_ad_eqov16_macro(13); + rho_ad_eqov16_macro(14); + rho_ad_eqov16_macro(15); + +#endif + +} + +#define rho_ad_ud16_macro(i) \ + s[i] = s[i] ^ mp[i]; + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#else + + rho_ad_ud16_macro(0); + rho_ad_ud16_macro(1); + rho_ad_ud16_macro(2); + rho_ad_ud16_macro(3); + rho_ad_ud16_macro(4); + rho_ad_ud16_macro(5); + rho_ad_ud16_macro(6); + rho_ad_ud16_macro(7); + rho_ad_ud16_macro(8); + rho_ad_ud16_macro(9); + rho_ad_ud16_macro(10); + rho_ad_ud16_macro(11); + rho_ad_ud16_macro(12); + rho_ad_ud16_macro(13); + rho_ad_ud16_macro(14); + rho_ad_ud16_macro(15); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_WORD_CAST + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#else + + uint32_t c0, c1, c2, c3; + uint32_t s0, s1, s2, s3; + uint32_t m0, m1, m2, m3; + + pack_word(m[0], m[1], m[2], m[3], m0); + pack_word(m[4], m[5], m[6], m[7], m1); + pack_word(m[8], m[9], m[10], m[11], m2); + pack_word(m[12], m[13], m[14], m[15], m3); + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + pack_word(c[0], c[1], c[2], c[3], c0); + pack_word(c[4], c[5], c[6], c[7], c1); + pack_word(c[8], c[9], c[10], c[11], c2); + pack_word(c[12], c[13], c[14], c[15], c3); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + unpack_word(s[0], s[1], s[2], s[3], s0); + unpack_word(s[4], s[5], s[6], s[7], s1); + unpack_word(s[8], s[9], s[10], s[11], s2); + unpack_word(s[12], s[13], s[14], s[15], s3); + + unpack_word(c[0], c[1], c[2], c[3], c0); + unpack_word(c[4], c[5], c[6], c[7], c1); + unpack_word(c[8], c[9], c[10], c[11], c2); + unpack_word(c[12], c[13], c[14], c[15], c3); + +#endif + +} + +#define rho_ud16_macro(i) \ + s[i] = s[i] ^ mp[i]; + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + + for (int i = 0; i < 16; i++) { + if (i < len8) { + c[i] = c[i] ^ mp[i]; + } + else { + c[i] = 0; + } + } + +#else + + rho_ud16_macro(0); + rho_ud16_macro(1); + rho_ud16_macro(2); + rho_ud16_macro(3); + rho_ud16_macro(4); + rho_ud16_macro(5); + rho_ud16_macro(6); + rho_ud16_macro(7); + rho_ud16_macro(8); + rho_ud16_macro(9); + rho_ud16_macro(10); + rho_ud16_macro(11); + rho_ud16_macro(12); + rho_ud16_macro(13); + rho_ud16_macro(14); + rho_ud16_macro(15); + + for (int i = 0; i < 16; i++) { + if (i < len8) { + c[i] = c[i] ^ mp[i]; + } + else { + c[i] = 0; + } + } + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_WORD_CAST + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#else + + uint32_t c0, c1, c2, c3; + uint32_t s0, s1, s2, s3; + uint32_t m0, m1, m2, m3; + + pack_word(m[0], m[1], m[2], m[3], m0); + pack_word(m[4], m[5], m[6], m[7], m1); + pack_word(m[8], m[9], m[10], m[11], m2); + pack_word(m[12], m[13], m[14], m[15], m3); + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + pack_word(c[0], c[1], c[2], c[3], c0); + pack_word(c[4], c[5], c[6], c[7], c1); + pack_word(c[8], c[9], c[10], c[11], c2); + pack_word(c[12], c[13], c[14], c[15], c3); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + unpack_word(s[0], s[1], s[2], s[3], s0); + unpack_word(s[4], s[5], s[6], s[7], s1); + unpack_word(s[8], s[9], s[10], s[11], s2); + unpack_word(s[12], s[13], s[14], s[15], s3); + + unpack_word(m[0], m[1], m[2], m[3], m0); + unpack_word(m[4], m[5], m[6], m[7], m1); + unpack_word(m[8], m[9], m[10], m[11], m2); + unpack_word(m[12], m[13], m[14], m[15], m3); + +#endif + +} + +#define irho_ud16_macro(i) \ + s[i] = s[i] ^ cp[i]; + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&cp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&cp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&cp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&cp[12]); + + for (int i = 0; i < len8; i++) { + s[i] ^= m[i]; + } + + for (int i = 0; i < 16; i++) { + if (i < len8) { + m[i] = m[i] ^ cp[i]; + } + else { + m[i] = 0; + } + } + +#else + + irho_ud16_macro(0); + irho_ud16_macro(1); + irho_ud16_macro(2); + irho_ud16_macro(3); + irho_ud16_macro(4); + irho_ud16_macro(5); + irho_ud16_macro(6); + irho_ud16_macro(7); + irho_ud16_macro(8); + irho_ud16_macro(9); + irho_ud16_macro(10); + irho_ud16_macro(11); + irho_ud16_macro(12); + irho_ud16_macro(13); + irho_ud16_macro(14); + irho_ud16_macro(15); + + for (int i = 0; i < len8; i++) { + s[i] ^= m[i]; + } + + for (int i = 0; i < 16; i++) { + if (i < len8) { + m[i] = m[i] ^ cp[i]; + } + else { + m[i] = 0; + } + } + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#else + + CNT[0] = 0x01; + CNT[1] = 0x00; + CNT[2] = 0x00; + CNT[3] = 0x00; + CNT[4] = 0x00; + CNT[5] = 0x00; + CNT[6] = 0x00; + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_WORD_CAST + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#else + + uint32_t fb0 = CNT[6] >> 7; + + CNT[6] = (CNT[6] << 1) | (CNT[5] >> 7); + CNT[5] = (CNT[5] << 1) | (CNT[4] >> 7); + CNT[4] = (CNT[4] << 1) | (CNT[3] >> 7); + CNT[3] = (CNT[3] << 1) | (CNT[2] >> 7); + CNT[2] = (CNT[2] << 1) | (CNT[1] >> 7); + CNT[1] = (CNT[1] << 1) | (CNT[0] >> 7); + if (fb0 == 1) { + CNT[0] = (CNT[0] << 1) ^ 0x95; + } + else { + CNT[0] = (CNT[0] << 1); + } + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + +#ifdef ___ENABLE_WORD_CAST + + g8A_for_Tag_Generation(s, *c); + +#else + + g8A(s, *c); + +#endif + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (mlen >= 16) { + len8 = 16; + mlen = mlen - 16; + rho_eqov16(*M, *c, s); + } + else { + len8 = mlen; + mlen = 0; + rho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + if (mlen != 0) { + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + } + return mlen; + +} + +unsigned long long msg_decryption ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (clen >= 16) { + len8 = 16; + clen = clen - 16; + irho_eqov16(*M, *c, s); + } + else { + len8 = clen; + clen = 0; + irho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + return clen; + +} + +unsigned long long ad2msg_encryption ( + const unsigned char** M, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (mlen <= 16) { + len8 = mlen; + mlen = 0; + } + else { + len8 = 16; + mlen = mlen - 16; + } + + pad (*M,T,len8); + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + *M = *M + len8; + + return mlen; + +} + +unsigned long long ad_encryption ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + + rho_ad_eqov16(*A, s); + } + else { + len8 = adlen; + adlen = 0; + rho_ad_ud16(*A, s, len8); + } + *A = *A + len8; + lfsr_gf56(CNT); + if (adlen != 0) { + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + } + else { + len8 = adlen; + adlen = 0; + } + pad(*A, T, len8); + *A = *A + len8; + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + } + + return adlen; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + N = npub; + + xlen = mlen; + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#else + + s[0] = 0; + s[1] = 0; + s[2] = 0; + s[3] = 0; + s[4] = 0; + s[5] = 0; + s[6] = 0; + s[7] = 0; + s[8] = 0; + s[9] = 0; + s[10] = 0; + s[11] = 0; + s[12] = 0; + s[13] = 0; + s[14] = 0; + s[15] = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&m,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (mlen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&m,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + m = m - mlen; + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#else + + s[0] = T[0]; + s[1] = T[1]; + s[2] = T[2]; + s[3] = T[3]; + s[4] = T[4]; + s[5] = T[5]; + s[6] = T[6]; + s[7] = T[7]; + s[8] = T[8]; + s[9] = T[9]; + s[10] = T[10]; + s[11] = T[11]; + s[12] = T[12]; + s[13] = T[13]; + s[14] = T[14]; + s[15] = T[15]; + +#endif + + *clen = mlen + 16; + + if (mlen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + while (mlen > 16) { + mlen = msg_encryption(&m,&c,N,CNT,s,k,36,mlen,&l_skinny_ctrl); + } + rho_ud16(m, c, s, mlen); + c = c + mlen; + m = m + mlen; + } + + // Tag Concatenation + c[0] = T[0]; + c[1] = T[1]; + c[2] = T[2]; + c[3] = T[3]; + c[4] = T[4]; + c[5] = T[5]; + c[6] = T[6]; + c[7] = T[7]; + c[8] = T[8]; + c[9] = T[9]; + c[10] = T[10]; + c[11] = T[11]; + c[12] = T[12]; + c[13] = T[13]; + c[14] = T[14]; + c[15] = T[15]; + + c = c - *clen; + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + const unsigned char* mauth; + unsigned char* p1; + unsigned char* p2; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + mauth = m; + + N = npub; + + xlen = clen-16; + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#else + + s[0] = 0; + s[1] = 0; + s[2] = 0; + s[3] = 0; + s[4] = 0; + s[5] = 0; + s[6] = 0; + s[7] = 0; + s[8] = 0; + s[9] = 0; + s[10] = 0; + s[11] = 0; + s[12] = 0; + s[13] = 0; + s[14] = 0; + s[15] = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&mauth,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (clen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&mauth,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + p1 = T; + p2 = (unsigned char*)&c[clen - 16]; + + p1[0] = p2[0]; + p1[1] = p2[1]; + p1[2] = p2[2]; + p1[3] = p2[3]; + p1[4] = p2[4]; + p1[5] = p2[5]; + p1[6] = p2[6]; + p1[7] = p2[7]; + p1[8] = p2[8]; + p1[9] = p2[9]; + p1[10] = p2[10]; + p1[11] = p2[11]; + p1[12] = p2[12]; + p1[13] = p2[13]; + p1[14] = p2[14]; + p1[15] = p2[15]; + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#else + + s[0] = T[0]; + s[1] = T[1]; + s[2] = T[2]; + s[3] = T[3]; + s[4] = T[4]; + s[5] = T[5]; + s[6] = T[6]; + s[7] = T[7]; + s[8] = T[8]; + s[9] = T[9]; + s[10] = T[10]; + s[11] = T[11]; + s[12] = T[12]; + s[13] = T[13]; + s[14] = T[14]; + s[15] = T[15]; + +#endif + + clen = clen - 16; + *mlen = clen; + + if (clen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + while (clen > 16) { + clen = msg_decryption(&m,&c,N,CNT,s,k,36,clen,&l_skinny_ctrl); + } + irho_ud16(m, c, s, clen); + c = c + clen; + m = m + clen; + } + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny.h new file mode 100644 index 0000000..d9f4a34 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny.h @@ -0,0 +1,69 @@ +#define ___SKINNY_LOOP +//#define ___NUM_OF_ROUNDS_56 +#define ___ENABLE_WORD_CAST + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + uint32_t roundKeys[240]; // number of rounds : 56 +#else + uint32_t roundKeys[176]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..923d4b8 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny_key_schedule2.c @@ -0,0 +1,227 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[4]; + w1 = roundKeys[5]; + + tk2 = &roundKeys[16]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = roundKeys[6]; + w1 = roundKeys[7]; + + tk2 = &roundKeys[18]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else + +void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[4]; + w1 = roundKeys[5]; + + tk2 = &roundKeys[16]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = roundKeys[6]; + w1 = roundKeys[7]; + + tk2 = &roundKeys[18]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..39254a6 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny_key_schedule3.c @@ -0,0 +1,228 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ c1Val; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[8]; + w1 = roundKeys[9]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x000); + PERMUTATION_TK3(0xf, 0x100); + PERMUTATION_TK3(0xd, 0x300); + PERMUTATION_TK3(0x7, 0x300); + PERMUTATION_TK3(0xe, 0x100); + PERMUTATION_TK3(0x9, 0x300); + PERMUTATION_TK3(0x7, 0x200); + PERMUTATION_TK3(0xd, 0x100); + PERMUTATION_TK3(0x5, 0x300); + + PERMUTATION_TK3(0x6, 0x100); + PERMUTATION_TK3(0x8, 0x100); + PERMUTATION_TK3(0x1, 0x200); + PERMUTATION_TK3(0x5, 0x000); + PERMUTATION_TK3(0x7, 0x100); + PERMUTATION_TK3(0xc, 0x100); + PERMUTATION_TK3(0x1, 0x300); + PERMUTATION_TK3(0x6, 0x000); + PERMUTATION_TK3(0xb, 0x100); + PERMUTATION_TK3(0xd, 0x200); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x300); + PERMUTATION_TK3(0x2, 0x100); + PERMUTATION_TK3(0x8, 0x000); + PERMUTATION_TK3(0x2, 0x200); + PERMUTATION_TK3(0x9, 0x000); + PERMUTATION_TK3(0x6, 0x200); + PERMUTATION_TK3(0x9, 0x100); + PERMUTATION_TK3(0x5, 0x200); + +#endif + + // even + + // load master key + w0 = roundKeys[10]; + w1 = roundKeys[11]; + + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x000); + PERMUTATION_TK3(0xf, 0x000); + PERMUTATION_TK3(0xe, 0x300); + PERMUTATION_TK3(0xb, 0x300); + PERMUTATION_TK3(0xf, 0x200); + PERMUTATION_TK3(0xc, 0x300); + PERMUTATION_TK3(0x3, 0x300); + PERMUTATION_TK3(0xe, 0x000); + PERMUTATION_TK3(0xa, 0x300); + PERMUTATION_TK3(0xb, 0x200); + + PERMUTATION_TK3(0xc, 0x200); + PERMUTATION_TK3(0x0, 0x300); + PERMUTATION_TK3(0x2, 0x000); + PERMUTATION_TK3(0xb, 0x000); + PERMUTATION_TK3(0xe, 0x200); + PERMUTATION_TK3(0x8, 0x300); + PERMUTATION_TK3(0x3, 0x200); + PERMUTATION_TK3(0xd, 0x000); + PERMUTATION_TK3(0x6, 0x300); + PERMUTATION_TK3(0xa, 0x100); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x200); + PERMUTATION_TK3(0x4, 0x200); + PERMUTATION_TK3(0x1, 0x100); + PERMUTATION_TK3(0x4, 0x000); + PERMUTATION_TK3(0x3, 0x100); + PERMUTATION_TK3(0xc, 0x000); + PERMUTATION_TK3(0x2, 0x300); + PERMUTATION_TK3(0xa, 0x000); + +#endif + +} + +#else + +void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint16_t c0; + uint16_t c1; + + // odd + + // load master key + w0 = roundKeys[8]; + w1 = roundKeys[9]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + c1 <<= 8; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = roundKeys[10]; + w1 = roundKeys[11]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = &roundKeys[98]; +#else + pRC -= 110; + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + c1 <<= 8; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny_main.c new file mode 100644 index 0000000..74222ee --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32_NEC/skinny_main.c @@ -0,0 +1,537 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pack_word(T[0], T[1], T[2], T[3], pt[4]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pack_word(T[8], T[9], T[10], T[11], pt[6]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pack_word(K[0], K[1], K[2], K[3], pt[8]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pack_word(K[8], K[9], K[10], K[11], pt[10]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); +#endif + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pack_word(T[0], T[1], T[2], T[3], pt[4]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pack_word(T[8], T[9], T[10], T[11], pt[6]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); +#endif + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); +#endif + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ +/* permutation */ \ + \ + PERMUTATION(); \ + \ + /* store */ \ + \ + *tk1++ = w0; \ + *tk1++ = w1; + +#define SBOX_0(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0) ^ \ + (t1 << 8) ^ \ + (t2 << 16) ^ \ + (t3 << 24); + +#define SBOX_8(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 8) ^ \ + (t1 << 16) ^ \ + (t2 << 24) ^ \ + (t3); + +#define SBOX_16(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox2[t0]; /* AC(c2) */ \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 16) ^ \ + (t1 << 24) ^ \ + (t2) ^ \ + (t3 << 8); + +#define SBOX_24(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 24) ^ \ + (t1) ^ \ + (t2 << 8) ^ \ + (t3 << 16); + +#define SKINNY_MAIN() \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows) */ \ + \ + SBOX_0(w0); \ + SBOX_8(w1); \ + SBOX_16(w2); \ + SBOX_24(w3); \ + \ + /* LUT(with ShiftRows & AC(c2) */ \ + \ + w0 ^= *tk2++; \ + w1 ^= *tk2++; \ + \ + /* Load TK1 */ \ + \ + w0 ^= *tk1++; \ + w1 ^= *tk1++; \ + \ + /* MC */ \ + /* 0 2 3 */ \ + /* 0 */ \ + /* 1 2 */ \ + /* 0 2 */ \ + \ + /* 0^2 */ \ + t0 = w0 ^ w2; \ + \ + /* 1^2 */ \ + w2 = w1 ^ w2; \ + \ + /* 0 */ \ + w1 = w0; \ + \ + /* 0^2^3 */ \ + w0 = t0 ^ w3; \ + \ + /* 0^2 */ \ + w3 = t0; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2) */ \ + \ + SBOX_0(w0); \ + SBOX_8(w1); \ + SBOX_16(w2); \ + SBOX_24(w3); \ + \ + /* Load TK2^TK3^AC(c0 c1) */ \ + \ + w0 ^= *tk2++; \ + w1 ^= *tk2++; \ + \ + /* MC */ \ + /* 0 2 3 */ \ + /* 0 */ \ + /* 1 2 */ \ + /* 0 2 */ \ + \ + /* 0^2 */ \ + t0 = w0 ^ w2; \ + \ + /* 1^2 */ \ + w2 = w1 ^ w2; \ + \ + /* 0 */ \ + w1 = w0; \ + \ + /* 0^2^3 */ \ + w0 = t0 ^ w3; \ + \ + /* 0^2 */ \ + w3 = t0; + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t w2; + uint32_t w3; + +// TK1 + + // load master key + w0 = roundKeys[0]; + w1 = roundKeys[1]; + + // 1st round + // not need to store + + tk1 = &roundKeys[2]; + + // 2nd, ... ,8th round + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + +#ifndef ___ENABLE_WORD_CAST + pack_word(block[0], block[1], block[2], block[3], w0); + pack_word(block[4], block[5], block[6], block[7], w1); + pack_word(block[8], block[9], block[10], block[11], w2); + pack_word(block[12], block[13], block[14], block[15], w3); +#else + w0 = *(uint32_t*)(&block[0]); + w1 = *(uint32_t*)(&block[4]); + w2 = *(uint32_t*)(&block[8]); + w3 = *(uint32_t*)(&block[12]); +#endif + + tk2 = &roundKeys[16]; + tk1 = &roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +#ifndef ___ENABLE_WORD_CAST + unpack_word(block[0], block[1], block[2], block[3], w0); + unpack_word(block[4], block[5], block[6], block[7], w1); + unpack_word(block[8], block[9], block[10], block[11], w2); + unpack_word(block[12], block[13], block[14], block[15], w3); +#else + *(uint32_t*)(&block[0]) = w0; + *(uint32_t*)(&block[4]) = w1; + *(uint32_t*)(&block[8]) = w2; + *(uint32_t*)(&block[12]) = w3; +#endif + +} + +#else + +void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t w2; + uint32_t w3; + +// TK1 + + // load master key + w0 = roundKeys[0]; + w1 = roundKeys[1]; + + // 1st round + // not need to store + + tk1 = &roundKeys[2]; + + // 2nd, ... ,8th round + for(int i=0;i<7;i++) + { + PERMUTATION_TK1(); + } + +// SB+AC+ShR+MC + +#ifndef ___ENABLE_WORD_CAST + pack_word(block[0], block[1], block[2], block[3], w0); + pack_word(block[4], block[5], block[6], block[7], w1); + pack_word(block[8], block[9], block[10], block[11], w2); + pack_word(block[12], block[13], block[14], block[15], w3); +#else + w0 = *(uint32_t*)(&block[0]); + w1 = *(uint32_t*)(&block[4]); + w2 = *(uint32_t*)(&block[8]); + w3 = *(uint32_t*)(&block[12]); +#endif + + tk2 = &roundKeys[16]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = &roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = &roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +#ifndef ___ENABLE_WORD_CAST + unpack_word(block[0], block[1], block[2], block[3], w0); + unpack_word(block[4], block[5], block[6], block[7], w1); + unpack_word(block[8], block[9], block[10], block[11], w2); + unpack_word(block[12], block[13], block[14], block[15], w3); +#else + *(uint32_t*)(&block[0]) = w0; + *(uint32_t*)(&block[4]) = w1; + *(uint32_t*)(&block[8]) = w2; + *(uint32_t*)(&block[12]) = w3; +#endif + +} + +#endif diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/api.h b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/encrypt.c new file mode 100644 index 0000000..4bc24fa --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/encrypt.c @@ -0,0 +1,1245 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = (len8 & 0x0f); + for (int i = 0; i < len8; i++) { + mp[i] = m[i]; + } + +#else + + mp[0] = 0; + mp[1] = 0; + mp[2] = 0; + mp[3] = 0; + mp[4] = 0; + mp[5] = 0; + mp[6] = 0; + mp[7] = 0; + mp[8] = 0; + mp[9] = 0; + mp[10] = 0; + mp[11] = 0; + mp[12] = 0; + mp[13] = 0; + mp[14] = 0; + mp[15] = (len8 & 0x0f); + for (int i = 0; i < len8; i++) { + mp[i] = m[i]; + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_WORD_CAST + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#else + + uint32_t s0, s1, s2, s3; + uint32_t c0, c1, c2, c3; + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + unpack_word(c[0], c[1], c[2], c[3], c0); + unpack_word(c[4], c[5], c[6], c[7], c1); + unpack_word(c[8], c[9], c[10], c[11], c2); + unpack_word(c[12], c[13], c[14], c[15], c3); + +#endif + +} + +#ifdef ___ENABLE_WORD_CAST + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +} + +#endif + +#define rho_ad_eqov16_macro(i) \ + s[i] = s[i] ^ m[i]; + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#else + + rho_ad_eqov16_macro(0); + rho_ad_eqov16_macro(1); + rho_ad_eqov16_macro(2); + rho_ad_eqov16_macro(3); + rho_ad_eqov16_macro(4); + rho_ad_eqov16_macro(5); + rho_ad_eqov16_macro(6); + rho_ad_eqov16_macro(7); + rho_ad_eqov16_macro(8); + rho_ad_eqov16_macro(9); + rho_ad_eqov16_macro(10); + rho_ad_eqov16_macro(11); + rho_ad_eqov16_macro(12); + rho_ad_eqov16_macro(13); + rho_ad_eqov16_macro(14); + rho_ad_eqov16_macro(15); + +#endif + +} + +#define rho_ad_ud16_macro(i) \ + s[i] = s[i] ^ mp[i]; + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#else + + rho_ad_ud16_macro(0); + rho_ad_ud16_macro(1); + rho_ad_ud16_macro(2); + rho_ad_ud16_macro(3); + rho_ad_ud16_macro(4); + rho_ad_ud16_macro(5); + rho_ad_ud16_macro(6); + rho_ad_ud16_macro(7); + rho_ad_ud16_macro(8); + rho_ad_ud16_macro(9); + rho_ad_ud16_macro(10); + rho_ad_ud16_macro(11); + rho_ad_ud16_macro(12); + rho_ad_ud16_macro(13); + rho_ad_ud16_macro(14); + rho_ad_ud16_macro(15); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_WORD_CAST + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#else + + uint32_t c0, c1, c2, c3; + uint32_t s0, s1, s2, s3; + uint32_t m0, m1, m2, m3; + + pack_word(m[0], m[1], m[2], m[3], m0); + pack_word(m[4], m[5], m[6], m[7], m1); + pack_word(m[8], m[9], m[10], m[11], m2); + pack_word(m[12], m[13], m[14], m[15], m3); + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + pack_word(c[0], c[1], c[2], c[3], c0); + pack_word(c[4], c[5], c[6], c[7], c1); + pack_word(c[8], c[9], c[10], c[11], c2); + pack_word(c[12], c[13], c[14], c[15], c3); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + unpack_word(s[0], s[1], s[2], s[3], s0); + unpack_word(s[4], s[5], s[6], s[7], s1); + unpack_word(s[8], s[9], s[10], s[11], s2); + unpack_word(s[12], s[13], s[14], s[15], s3); + + unpack_word(c[0], c[1], c[2], c[3], c0); + unpack_word(c[4], c[5], c[6], c[7], c1); + unpack_word(c[8], c[9], c[10], c[11], c2); + unpack_word(c[12], c[13], c[14], c[15], c3); + +#endif + +} + +#define rho_ud16_macro(i) \ + s[i] = s[i] ^ mp[i]; + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + + for (int i = 0; i < 16; i++) { + if (i < len8) { + c[i] = c[i] ^ mp[i]; + } + else { + c[i] = 0; + } + } + +#else + + rho_ud16_macro(0); + rho_ud16_macro(1); + rho_ud16_macro(2); + rho_ud16_macro(3); + rho_ud16_macro(4); + rho_ud16_macro(5); + rho_ud16_macro(6); + rho_ud16_macro(7); + rho_ud16_macro(8); + rho_ud16_macro(9); + rho_ud16_macro(10); + rho_ud16_macro(11); + rho_ud16_macro(12); + rho_ud16_macro(13); + rho_ud16_macro(14); + rho_ud16_macro(15); + + for (int i = 0; i < 16; i++) { + if (i < len8) { + c[i] = c[i] ^ mp[i]; + } + else { + c[i] = 0; + } + } + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_WORD_CAST + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#else + + uint32_t c0, c1, c2, c3; + uint32_t s0, s1, s2, s3; + uint32_t m0, m1, m2, m3; + + pack_word(m[0], m[1], m[2], m[3], m0); + pack_word(m[4], m[5], m[6], m[7], m1); + pack_word(m[8], m[9], m[10], m[11], m2); + pack_word(m[12], m[13], m[14], m[15], m3); + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + pack_word(c[0], c[1], c[2], c[3], c0); + pack_word(c[4], c[5], c[6], c[7], c1); + pack_word(c[8], c[9], c[10], c[11], c2); + pack_word(c[12], c[13], c[14], c[15], c3); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + unpack_word(s[0], s[1], s[2], s[3], s0); + unpack_word(s[4], s[5], s[6], s[7], s1); + unpack_word(s[8], s[9], s[10], s[11], s2); + unpack_word(s[12], s[13], s[14], s[15], s3); + + unpack_word(m[0], m[1], m[2], m[3], m0); + unpack_word(m[4], m[5], m[6], m[7], m1); + unpack_word(m[8], m[9], m[10], m[11], m2); + unpack_word(m[12], m[13], m[14], m[15], m3); + +#endif + +} + +#define irho_ud16_macro(i) \ + s[i] = s[i] ^ cp[i]; + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&cp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&cp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&cp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&cp[12]); + + for (int i = 0; i < len8; i++) { + s[i] ^= m[i]; + } + + for (int i = 0; i < 16; i++) { + if (i < len8) { + m[i] = m[i] ^ cp[i]; + } + else { + m[i] = 0; + } + } + +#else + + irho_ud16_macro(0); + irho_ud16_macro(1); + irho_ud16_macro(2); + irho_ud16_macro(3); + irho_ud16_macro(4); + irho_ud16_macro(5); + irho_ud16_macro(6); + irho_ud16_macro(7); + irho_ud16_macro(8); + irho_ud16_macro(9); + irho_ud16_macro(10); + irho_ud16_macro(11); + irho_ud16_macro(12); + irho_ud16_macro(13); + irho_ud16_macro(14); + irho_ud16_macro(15); + + for (int i = 0; i < len8; i++) { + s[i] ^= m[i]; + } + + for (int i = 0; i < 16; i++) { + if (i < len8) { + m[i] = m[i] ^ cp[i]; + } + else { + m[i] = 0; + } + } + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#else + + CNT[0] = 0x01; + CNT[1] = 0x00; + CNT[2] = 0x00; + CNT[3] = 0x00; + CNT[4] = 0x00; + CNT[5] = 0x00; + CNT[6] = 0x00; + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_WORD_CAST + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#else + + uint32_t fb0 = CNT[6] >> 7; + + CNT[6] = (CNT[6] << 1) | (CNT[5] >> 7); + CNT[5] = (CNT[5] << 1) | (CNT[4] >> 7); + CNT[4] = (CNT[4] << 1) | (CNT[3] >> 7); + CNT[3] = (CNT[3] << 1) | (CNT[2] >> 7); + CNT[2] = (CNT[2] << 1) | (CNT[1] >> 7); + CNT[1] = (CNT[1] << 1) | (CNT[0] >> 7); + if (fb0 == 1) { + CNT[0] = (CNT[0] << 1) ^ 0x95; + } + else { + CNT[0] = (CNT[0] << 1); + } + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + +#ifdef ___ENABLE_WORD_CAST + + g8A_for_Tag_Generation(s, *c); + +#else + + g8A(s, *c); + +#endif + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (mlen >= 16) { + len8 = 16; + mlen = mlen - 16; + rho_eqov16(*M, *c, s); + } + else { + len8 = mlen; + mlen = 0; + rho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + if (mlen != 0) { + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + } + return mlen; + +} + +unsigned long long msg_decryption ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (clen >= 16) { + len8 = 16; + clen = clen - 16; + irho_eqov16(*M, *c, s); + } + else { + len8 = clen; + clen = 0; + irho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + return clen; + +} + +unsigned long long ad2msg_encryption ( + const unsigned char** M, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (mlen <= 16) { + len8 = mlen; + mlen = 0; + } + else { + len8 = 16; + mlen = mlen - 16; + } + + pad (*M,T,len8); + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + *M = *M + len8; + + return mlen; + +} + +unsigned long long ad_encryption ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + + rho_ad_eqov16(*A, s); + } + else { + len8 = adlen; + adlen = 0; + rho_ad_ud16(*A, s, len8); + } + *A = *A + len8; + lfsr_gf56(CNT); + if (adlen != 0) { + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + } + else { + len8 = adlen; + adlen = 0; + } + pad(*A, T, len8); + *A = *A + len8; + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + } + + return adlen; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + N = npub; + + xlen = mlen; + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#else + + s[0] = 0; + s[1] = 0; + s[2] = 0; + s[3] = 0; + s[4] = 0; + s[5] = 0; + s[6] = 0; + s[7] = 0; + s[8] = 0; + s[9] = 0; + s[10] = 0; + s[11] = 0; + s[12] = 0; + s[13] = 0; + s[14] = 0; + s[15] = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&m,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (mlen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&m,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + m = m - mlen; + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#else + + s[0] = T[0]; + s[1] = T[1]; + s[2] = T[2]; + s[3] = T[3]; + s[4] = T[4]; + s[5] = T[5]; + s[6] = T[6]; + s[7] = T[7]; + s[8] = T[8]; + s[9] = T[9]; + s[10] = T[10]; + s[11] = T[11]; + s[12] = T[12]; + s[13] = T[13]; + s[14] = T[14]; + s[15] = T[15]; + +#endif + + *clen = mlen + 16; + + if (mlen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + while (mlen > 16) { + mlen = msg_encryption(&m,&c,N,CNT,s,k,36,mlen,&l_skinny_ctrl); + } + rho_ud16(m, c, s, mlen); + c = c + mlen; + m = m + mlen; + } + + // Tag Concatenation + c[0] = T[0]; + c[1] = T[1]; + c[2] = T[2]; + c[3] = T[3]; + c[4] = T[4]; + c[5] = T[5]; + c[6] = T[6]; + c[7] = T[7]; + c[8] = T[8]; + c[9] = T[9]; + c[10] = T[10]; + c[11] = T[11]; + c[12] = T[12]; + c[13] = T[13]; + c[14] = T[14]; + c[15] = T[15]; + + c = c - *clen; + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + const unsigned char* mauth; + unsigned char* p1; + unsigned char* p2; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + mauth = m; + + N = npub; + + xlen = clen-16; + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#else + + s[0] = 0; + s[1] = 0; + s[2] = 0; + s[3] = 0; + s[4] = 0; + s[5] = 0; + s[6] = 0; + s[7] = 0; + s[8] = 0; + s[9] = 0; + s[10] = 0; + s[11] = 0; + s[12] = 0; + s[13] = 0; + s[14] = 0; + s[15] = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&mauth,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (clen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&mauth,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + p1 = T; + p2 = (unsigned char*)&c[clen - 16]; + + p1[0] = p2[0]; + p1[1] = p2[1]; + p1[2] = p2[2]; + p1[3] = p2[3]; + p1[4] = p2[4]; + p1[5] = p2[5]; + p1[6] = p2[6]; + p1[7] = p2[7]; + p1[8] = p2[8]; + p1[9] = p2[9]; + p1[10] = p2[10]; + p1[11] = p2[11]; + p1[12] = p2[12]; + p1[13] = p2[13]; + p1[14] = p2[14]; + p1[15] = p2[15]; + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#else + + s[0] = T[0]; + s[1] = T[1]; + s[2] = T[2]; + s[3] = T[3]; + s[4] = T[4]; + s[5] = T[5]; + s[6] = T[6]; + s[7] = T[7]; + s[8] = T[8]; + s[9] = T[9]; + s[10] = T[10]; + s[11] = T[11]; + s[12] = T[12]; + s[13] = T[13]; + s[14] = T[14]; + s[15] = T[15]; + +#endif + + clen = clen - 16; + *mlen = clen; + + if (clen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + while (clen > 16) { + clen = msg_decryption(&m,&c,N,CNT,s,k,36,clen,&l_skinny_ctrl); + } + irho_ud16(m, c, s, clen); + c = c + clen; + m = m + clen; + } + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny.h new file mode 100644 index 0000000..5b36459 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny.h @@ -0,0 +1,69 @@ +#define ___SKINNY_LOOP +#define ___NUM_OF_ROUNDS_56 +#define ___ENABLE_WORD_CAST + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + uint32_t roundKeys[240]; // number of rounds : 56 +#else + uint32_t roundKeys[176]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..923d4b8 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny_key_schedule2.c @@ -0,0 +1,227 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[4]; + w1 = roundKeys[5]; + + tk2 = &roundKeys[16]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = roundKeys[6]; + w1 = roundKeys[7]; + + tk2 = &roundKeys[18]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else + +void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[4]; + w1 = roundKeys[5]; + + tk2 = &roundKeys[16]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = roundKeys[6]; + w1 = roundKeys[7]; + + tk2 = &roundKeys[18]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..39254a6 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny_key_schedule3.c @@ -0,0 +1,228 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ c1Val; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[8]; + w1 = roundKeys[9]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x000); + PERMUTATION_TK3(0xf, 0x100); + PERMUTATION_TK3(0xd, 0x300); + PERMUTATION_TK3(0x7, 0x300); + PERMUTATION_TK3(0xe, 0x100); + PERMUTATION_TK3(0x9, 0x300); + PERMUTATION_TK3(0x7, 0x200); + PERMUTATION_TK3(0xd, 0x100); + PERMUTATION_TK3(0x5, 0x300); + + PERMUTATION_TK3(0x6, 0x100); + PERMUTATION_TK3(0x8, 0x100); + PERMUTATION_TK3(0x1, 0x200); + PERMUTATION_TK3(0x5, 0x000); + PERMUTATION_TK3(0x7, 0x100); + PERMUTATION_TK3(0xc, 0x100); + PERMUTATION_TK3(0x1, 0x300); + PERMUTATION_TK3(0x6, 0x000); + PERMUTATION_TK3(0xb, 0x100); + PERMUTATION_TK3(0xd, 0x200); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x300); + PERMUTATION_TK3(0x2, 0x100); + PERMUTATION_TK3(0x8, 0x000); + PERMUTATION_TK3(0x2, 0x200); + PERMUTATION_TK3(0x9, 0x000); + PERMUTATION_TK3(0x6, 0x200); + PERMUTATION_TK3(0x9, 0x100); + PERMUTATION_TK3(0x5, 0x200); + +#endif + + // even + + // load master key + w0 = roundKeys[10]; + w1 = roundKeys[11]; + + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x000); + PERMUTATION_TK3(0xf, 0x000); + PERMUTATION_TK3(0xe, 0x300); + PERMUTATION_TK3(0xb, 0x300); + PERMUTATION_TK3(0xf, 0x200); + PERMUTATION_TK3(0xc, 0x300); + PERMUTATION_TK3(0x3, 0x300); + PERMUTATION_TK3(0xe, 0x000); + PERMUTATION_TK3(0xa, 0x300); + PERMUTATION_TK3(0xb, 0x200); + + PERMUTATION_TK3(0xc, 0x200); + PERMUTATION_TK3(0x0, 0x300); + PERMUTATION_TK3(0x2, 0x000); + PERMUTATION_TK3(0xb, 0x000); + PERMUTATION_TK3(0xe, 0x200); + PERMUTATION_TK3(0x8, 0x300); + PERMUTATION_TK3(0x3, 0x200); + PERMUTATION_TK3(0xd, 0x000); + PERMUTATION_TK3(0x6, 0x300); + PERMUTATION_TK3(0xa, 0x100); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x200); + PERMUTATION_TK3(0x4, 0x200); + PERMUTATION_TK3(0x1, 0x100); + PERMUTATION_TK3(0x4, 0x000); + PERMUTATION_TK3(0x3, 0x100); + PERMUTATION_TK3(0xc, 0x000); + PERMUTATION_TK3(0x2, 0x300); + PERMUTATION_TK3(0xa, 0x000); + +#endif + +} + +#else + +void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint16_t c0; + uint16_t c1; + + // odd + + // load master key + w0 = roundKeys[8]; + w1 = roundKeys[9]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + c1 <<= 8; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = roundKeys[10]; + w1 = roundKeys[11]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = &roundKeys[98]; +#else + pRC -= 110; + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + c1 <<= 8; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny_main.c new file mode 100644 index 0000000..74222ee --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32_NEC/skinny_main.c @@ -0,0 +1,537 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pack_word(T[0], T[1], T[2], T[3], pt[4]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pack_word(T[8], T[9], T[10], T[11], pt[6]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pack_word(K[0], K[1], K[2], K[3], pt[8]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pack_word(K[8], K[9], K[10], K[11], pt[10]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); +#endif + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pack_word(T[0], T[1], T[2], T[3], pt[4]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pack_word(T[8], T[9], T[10], T[11], pt[6]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); +#endif + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); +#endif + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ +/* permutation */ \ + \ + PERMUTATION(); \ + \ + /* store */ \ + \ + *tk1++ = w0; \ + *tk1++ = w1; + +#define SBOX_0(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0) ^ \ + (t1 << 8) ^ \ + (t2 << 16) ^ \ + (t3 << 24); + +#define SBOX_8(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 8) ^ \ + (t1 << 16) ^ \ + (t2 << 24) ^ \ + (t3); + +#define SBOX_16(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox2[t0]; /* AC(c2) */ \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 16) ^ \ + (t1 << 24) ^ \ + (t2) ^ \ + (t3 << 8); + +#define SBOX_24(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 24) ^ \ + (t1) ^ \ + (t2 << 8) ^ \ + (t3 << 16); + +#define SKINNY_MAIN() \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows) */ \ + \ + SBOX_0(w0); \ + SBOX_8(w1); \ + SBOX_16(w2); \ + SBOX_24(w3); \ + \ + /* LUT(with ShiftRows & AC(c2) */ \ + \ + w0 ^= *tk2++; \ + w1 ^= *tk2++; \ + \ + /* Load TK1 */ \ + \ + w0 ^= *tk1++; \ + w1 ^= *tk1++; \ + \ + /* MC */ \ + /* 0 2 3 */ \ + /* 0 */ \ + /* 1 2 */ \ + /* 0 2 */ \ + \ + /* 0^2 */ \ + t0 = w0 ^ w2; \ + \ + /* 1^2 */ \ + w2 = w1 ^ w2; \ + \ + /* 0 */ \ + w1 = w0; \ + \ + /* 0^2^3 */ \ + w0 = t0 ^ w3; \ + \ + /* 0^2 */ \ + w3 = t0; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2) */ \ + \ + SBOX_0(w0); \ + SBOX_8(w1); \ + SBOX_16(w2); \ + SBOX_24(w3); \ + \ + /* Load TK2^TK3^AC(c0 c1) */ \ + \ + w0 ^= *tk2++; \ + w1 ^= *tk2++; \ + \ + /* MC */ \ + /* 0 2 3 */ \ + /* 0 */ \ + /* 1 2 */ \ + /* 0 2 */ \ + \ + /* 0^2 */ \ + t0 = w0 ^ w2; \ + \ + /* 1^2 */ \ + w2 = w1 ^ w2; \ + \ + /* 0 */ \ + w1 = w0; \ + \ + /* 0^2^3 */ \ + w0 = t0 ^ w3; \ + \ + /* 0^2 */ \ + w3 = t0; + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t w2; + uint32_t w3; + +// TK1 + + // load master key + w0 = roundKeys[0]; + w1 = roundKeys[1]; + + // 1st round + // not need to store + + tk1 = &roundKeys[2]; + + // 2nd, ... ,8th round + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + +#ifndef ___ENABLE_WORD_CAST + pack_word(block[0], block[1], block[2], block[3], w0); + pack_word(block[4], block[5], block[6], block[7], w1); + pack_word(block[8], block[9], block[10], block[11], w2); + pack_word(block[12], block[13], block[14], block[15], w3); +#else + w0 = *(uint32_t*)(&block[0]); + w1 = *(uint32_t*)(&block[4]); + w2 = *(uint32_t*)(&block[8]); + w3 = *(uint32_t*)(&block[12]); +#endif + + tk2 = &roundKeys[16]; + tk1 = &roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +#ifndef ___ENABLE_WORD_CAST + unpack_word(block[0], block[1], block[2], block[3], w0); + unpack_word(block[4], block[5], block[6], block[7], w1); + unpack_word(block[8], block[9], block[10], block[11], w2); + unpack_word(block[12], block[13], block[14], block[15], w3); +#else + *(uint32_t*)(&block[0]) = w0; + *(uint32_t*)(&block[4]) = w1; + *(uint32_t*)(&block[8]) = w2; + *(uint32_t*)(&block[12]) = w3; +#endif + +} + +#else + +void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t w2; + uint32_t w3; + +// TK1 + + // load master key + w0 = roundKeys[0]; + w1 = roundKeys[1]; + + // 1st round + // not need to store + + tk1 = &roundKeys[2]; + + // 2nd, ... ,8th round + for(int i=0;i<7;i++) + { + PERMUTATION_TK1(); + } + +// SB+AC+ShR+MC + +#ifndef ___ENABLE_WORD_CAST + pack_word(block[0], block[1], block[2], block[3], w0); + pack_word(block[4], block[5], block[6], block[7], w1); + pack_word(block[8], block[9], block[10], block[11], w2); + pack_word(block[12], block[13], block[14], block[15], w3); +#else + w0 = *(uint32_t*)(&block[0]); + w1 = *(uint32_t*)(&block[4]); + w2 = *(uint32_t*)(&block[8]); + w3 = *(uint32_t*)(&block[12]); +#endif + + tk2 = &roundKeys[16]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = &roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = &roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +#ifndef ___ENABLE_WORD_CAST + unpack_word(block[0], block[1], block[2], block[3], w0); + unpack_word(block[4], block[5], block[6], block[7], w1); + unpack_word(block[8], block[9], block[10], block[11], w2); + unpack_word(block[12], block[13], block[14], block[15], w3); +#else + *(uint32_t*)(&block[0]) = w0; + *(uint32_t*)(&block[4]) = w1; + *(uint32_t*)(&block[8]) = w2; + *(uint32_t*)(&block[12]) = w3; +#endif + +} + +#endif diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/api.h b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/encrypt.c new file mode 100644 index 0000000..8668d91 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/encrypt.c @@ -0,0 +1,1024 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = (len8 & 0x0f); + for (int i = 0; i < len8; i++) { + mp[i] = m[i]; + } + +#else + + mp[0] = 0; + mp[1] = 0; + mp[2] = 0; + mp[3] = 0; + mp[4] = 0; + mp[5] = 0; + mp[6] = 0; + mp[7] = 0; + mp[8] = 0; + mp[9] = 0; + mp[10] = 0; + mp[11] = 0; + mp[12] = 0; + mp[13] = 0; + mp[14] = 0; + mp[15] = (len8 & 0x0f); + for (int i = 0; i < len8; i++) { + mp[i] = m[i]; + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_WORD_CAST + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#else + + uint32_t s0, s1, s2, s3; + uint32_t c0, c1, c2, c3; + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + unpack_word(c[0], c[1], c[2], c[3], c0); + unpack_word(c[4], c[5], c[6], c[7], c1); + unpack_word(c[8], c[9], c[10], c[11], c2); + unpack_word(c[12], c[13], c[14], c[15], c3); + +#endif + +} + +#ifdef ___ENABLE_WORD_CAST + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +} + +#endif + +#define rho_ad_eqov16_macro(i) \ + s[i] = s[i] ^ m[i]; + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#else + + rho_ad_eqov16_macro(0); + rho_ad_eqov16_macro(1); + rho_ad_eqov16_macro(2); + rho_ad_eqov16_macro(3); + rho_ad_eqov16_macro(4); + rho_ad_eqov16_macro(5); + rho_ad_eqov16_macro(6); + rho_ad_eqov16_macro(7); + rho_ad_eqov16_macro(8); + rho_ad_eqov16_macro(9); + rho_ad_eqov16_macro(10); + rho_ad_eqov16_macro(11); + rho_ad_eqov16_macro(12); + rho_ad_eqov16_macro(13); + rho_ad_eqov16_macro(14); + rho_ad_eqov16_macro(15); + +#endif + +} + +#define rho_ad_ud16_macro(i) \ + s[i] = s[i] ^ mp[i]; + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#else + + rho_ad_ud16_macro(0); + rho_ad_ud16_macro(1); + rho_ad_ud16_macro(2); + rho_ad_ud16_macro(3); + rho_ad_ud16_macro(4); + rho_ad_ud16_macro(5); + rho_ad_ud16_macro(6); + rho_ad_ud16_macro(7); + rho_ad_ud16_macro(8); + rho_ad_ud16_macro(9); + rho_ad_ud16_macro(10); + rho_ad_ud16_macro(11); + rho_ad_ud16_macro(12); + rho_ad_ud16_macro(13); + rho_ad_ud16_macro(14); + rho_ad_ud16_macro(15); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_WORD_CAST + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#else + + uint32_t c0, c1, c2, c3; + uint32_t s0, s1, s2, s3; + uint32_t m0, m1, m2, m3; + + pack_word(m[0], m[1], m[2], m[3], m0); + pack_word(m[4], m[5], m[6], m[7], m1); + pack_word(m[8], m[9], m[10], m[11], m2); + pack_word(m[12], m[13], m[14], m[15], m3); + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + pack_word(c[0], c[1], c[2], c[3], c0); + pack_word(c[4], c[5], c[6], c[7], c1); + pack_word(c[8], c[9], c[10], c[11], c2); + pack_word(c[12], c[13], c[14], c[15], c3); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + unpack_word(s[0], s[1], s[2], s[3], s0); + unpack_word(s[4], s[5], s[6], s[7], s1); + unpack_word(s[8], s[9], s[10], s[11], s2); + unpack_word(s[12], s[13], s[14], s[15], s3); + + unpack_word(c[0], c[1], c[2], c[3], c0); + unpack_word(c[4], c[5], c[6], c[7], c1); + unpack_word(c[8], c[9], c[10], c[11], c2); + unpack_word(c[12], c[13], c[14], c[15], c3); + +#endif + +} + +#define rho_ud16_macro(i) \ + s[i] = s[i] ^ mp[i]; + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + + for (int i = 0; i < 16; i++) { + if (i < len8) { + c[i] = c[i] ^ mp[i]; + } + else { + c[i] = 0; + } + } + +#else + + rho_ud16_macro(0); + rho_ud16_macro(1); + rho_ud16_macro(2); + rho_ud16_macro(3); + rho_ud16_macro(4); + rho_ud16_macro(5); + rho_ud16_macro(6); + rho_ud16_macro(7); + rho_ud16_macro(8); + rho_ud16_macro(9); + rho_ud16_macro(10); + rho_ud16_macro(11); + rho_ud16_macro(12); + rho_ud16_macro(13); + rho_ud16_macro(14); + rho_ud16_macro(15); + + for (int i = 0; i < 16; i++) { + if (i < len8) { + c[i] = c[i] ^ mp[i]; + } + else { + c[i] = 0; + } + } + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_WORD_CAST + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#else + + uint32_t c0, c1, c2, c3; + uint32_t s0, s1, s2, s3; + uint32_t m0, m1, m2, m3; + + pack_word(m[0], m[1], m[2], m[3], m0); + pack_word(m[4], m[5], m[6], m[7], m1); + pack_word(m[8], m[9], m[10], m[11], m2); + pack_word(m[12], m[13], m[14], m[15], m3); + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + pack_word(c[0], c[1], c[2], c[3], c0); + pack_word(c[4], c[5], c[6], c[7], c1); + pack_word(c[8], c[9], c[10], c[11], c2); + pack_word(c[12], c[13], c[14], c[15], c3); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + unpack_word(s[0], s[1], s[2], s[3], s0); + unpack_word(s[4], s[5], s[6], s[7], s1); + unpack_word(s[8], s[9], s[10], s[11], s2); + unpack_word(s[12], s[13], s[14], s[15], s3); + + unpack_word(m[0], m[1], m[2], m[3], m0); + unpack_word(m[4], m[5], m[6], m[7], m1); + unpack_word(m[8], m[9], m[10], m[11], m2); + unpack_word(m[12], m[13], m[14], m[15], m3); + +#endif + +} + +#define irho_ud16_macro(i) \ + s[i] = s[i] ^ cp[i]; + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&cp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&cp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&cp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&cp[12]); + + for (int i = 0; i < len8; i++) { + s[i] ^= m[i]; + } + + for (int i = 0; i < 16; i++) { + if (i < len8) { + m[i] = m[i] ^ cp[i]; + } + else { + m[i] = 0; + } + } + +#else + + irho_ud16_macro(0); + irho_ud16_macro(1); + irho_ud16_macro(2); + irho_ud16_macro(3); + irho_ud16_macro(4); + irho_ud16_macro(5); + irho_ud16_macro(6); + irho_ud16_macro(7); + irho_ud16_macro(8); + irho_ud16_macro(9); + irho_ud16_macro(10); + irho_ud16_macro(11); + irho_ud16_macro(12); + irho_ud16_macro(13); + irho_ud16_macro(14); + irho_ud16_macro(15); + + for (int i = 0; i < len8; i++) { + s[i] ^= m[i]; + } + + for (int i = 0; i < 16; i++) { + if (i < len8) { + m[i] = m[i] ^ cp[i]; + } + else { + m[i] = 0; + } + } + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#else + + CNT[0] = 0x01; + CNT[1] = 0x00; + CNT[2] = 0x00; + CNT[3] = 0x00; + CNT[4] = 0x00; + CNT[5] = 0x00; + CNT[6] = 0x00; + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_WORD_CAST + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#else + + uint32_t fb0 = CNT[6] >> 7; + + CNT[6] = (CNT[6] << 1) | (CNT[5] >> 7); + CNT[5] = (CNT[5] << 1) | (CNT[4] >> 7); + CNT[4] = (CNT[4] << 1) | (CNT[3] >> 7); + CNT[3] = (CNT[3] << 1) | (CNT[2] >> 7); + CNT[2] = (CNT[2] << 1) | (CNT[1] >> 7); + CNT[1] = (CNT[1] << 1) | (CNT[0] >> 7); + if (fb0 == 1) { + CNT[0] = (CNT[0] << 1) ^ 0x95; + } + else { + CNT[0] = (CNT[0] << 1); + } + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + +#ifdef ___ENABLE_WORD_CAST + + g8A_for_Tag_Generation(s, *c); + +#else + + g8A(s, *c); + +#endif + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption_eqov16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + + rho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return mlen - 16; + +} + +unsigned long long msg_encryption_ud16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + + rho_ud16(*M, *c, s, mlen); + *c = *c + mlen; + *M = *M + mlen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long msg_decryption_eqov16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return clen - 16; + +} +unsigned long long msg_decryption_ud16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_ud16(*M, *c, s, clen); + *c = *c + clen; + *M = *M + clen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long ad_encryption_eqov32 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&T[0]) = *(uint32_t*)(&(*A)[0]); + *(uint32_t*)(&T[4]) = *(uint32_t*)(&(*A)[4]); + *(uint32_t*)(&T[8]) = *(uint32_t*)(&(*A)[8]); + *(uint32_t*)(&T[12]) = *(uint32_t*)(&(*A)[12]); + +#else + + T[0] = (*A)[0]; + T[1] = (*A)[1]; + T[2] = (*A)[2]; + T[3] = (*A)[3]; + T[4] = (*A)[4]; + T[5] = (*A)[5]; + T[6] = (*A)[6]; + T[7] = (*A)[7]; + T[8] = (*A)[8]; + T[9] = (*A)[9]; + T[10] = (*A)[10]; + T[11] = (*A)[11]; + T[12] = (*A)[12]; + T[13] = (*A)[13]; + T[14] = (*A)[14]; + T[15] = (*A)[15]; + +#endif + + *A = *A + 16; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return adlen - 32; + +} + +unsigned long long ad_encryption_ov16 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + adlen = adlen - 16; + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + pad(*A, T, adlen); + *A = *A + adlen; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_eq16 ( + const unsigned char** A, unsigned char* s, + unsigned char* CNT) { + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_ud16( + const unsigned char** A, unsigned char* s, + unsigned long long adlen, + unsigned char* CNT) { + + rho_ad_ud16(*A, s, adlen); + *A = *A + adlen; + lfsr_gf56(CNT); + + return 0; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + const unsigned char* A; + const unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#else + + s[0] = 0; + s[1] = 0; + s[2] = 0; + s[3] = 0; + s[4] = 0; + s[5] = 0; + s[6] = 0; + s[7] = 0; + s[8] = 0; + s[9] = 0; + s[10] = 0; + s[11] = 0; + s[12] = 0; + s[13] = 0; + s[14] = 0; + s[15] = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + *clen = mlen + 16; + + if (mlen == 0) { // M is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (mlen > 0) { + if (mlen < 16) { // The last block of M is incomplete + mlen = msg_encryption_ud16(&M,&c,N,CNT,s,k,0x15,mlen,&ctrl); + } + else if (mlen == 16) { // The last block of M is complete + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x14,mlen,&ctrl); + } + else { // A normal full message block + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x04,mlen,&ctrl); + } + } + + // Tag generation + generate_tag(&c,s,clen); + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char T[16]; + unsigned char CNT[8]; + const unsigned char* A; + unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#else + + s[0] = 0; + s[1] = 0; + s[2] = 0; + s[3] = 0; + s[4] = 0; + s[5] = 0; + s[6] = 0; + s[7] = 0; + s[8] = 0; + s[9] = 0; + s[10] = 0; + s[11] = 0; + s[12] = 0; + s[13] = 0; + s[14] = 0; + s[15] = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + clen = clen -16; + *mlen = clen; + + if (clen == 0) { // C is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (clen > 0) { + if (clen < 16) { // The last block of C is incomplete + clen = msg_decryption_ud16(&M,&c,N,CNT,s,k,0x15,clen,&ctrl); + } + else if (clen == 16) { // The last block of C is complete + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x14,clen,&ctrl); + } + else { // A normal full message block + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x04,clen,&ctrl); + } + } + + // Tag generation +#ifdef ___ENABLE_WORD_CAST + + g8A_for_Tag_Generation(s, T); + +#else + + g8A(s, T); + +#endif + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny.h new file mode 100644 index 0000000..d9f4a34 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny.h @@ -0,0 +1,69 @@ +#define ___SKINNY_LOOP +//#define ___NUM_OF_ROUNDS_56 +#define ___ENABLE_WORD_CAST + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + uint32_t roundKeys[240]; // number of rounds : 56 +#else + uint32_t roundKeys[176]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..923d4b8 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny_key_schedule2.c @@ -0,0 +1,227 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[4]; + w1 = roundKeys[5]; + + tk2 = &roundKeys[16]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = roundKeys[6]; + w1 = roundKeys[7]; + + tk2 = &roundKeys[18]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else + +void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[4]; + w1 = roundKeys[5]; + + tk2 = &roundKeys[16]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = roundKeys[6]; + w1 = roundKeys[7]; + + tk2 = &roundKeys[18]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..39254a6 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny_key_schedule3.c @@ -0,0 +1,228 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ c1Val; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[8]; + w1 = roundKeys[9]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x000); + PERMUTATION_TK3(0xf, 0x100); + PERMUTATION_TK3(0xd, 0x300); + PERMUTATION_TK3(0x7, 0x300); + PERMUTATION_TK3(0xe, 0x100); + PERMUTATION_TK3(0x9, 0x300); + PERMUTATION_TK3(0x7, 0x200); + PERMUTATION_TK3(0xd, 0x100); + PERMUTATION_TK3(0x5, 0x300); + + PERMUTATION_TK3(0x6, 0x100); + PERMUTATION_TK3(0x8, 0x100); + PERMUTATION_TK3(0x1, 0x200); + PERMUTATION_TK3(0x5, 0x000); + PERMUTATION_TK3(0x7, 0x100); + PERMUTATION_TK3(0xc, 0x100); + PERMUTATION_TK3(0x1, 0x300); + PERMUTATION_TK3(0x6, 0x000); + PERMUTATION_TK3(0xb, 0x100); + PERMUTATION_TK3(0xd, 0x200); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x300); + PERMUTATION_TK3(0x2, 0x100); + PERMUTATION_TK3(0x8, 0x000); + PERMUTATION_TK3(0x2, 0x200); + PERMUTATION_TK3(0x9, 0x000); + PERMUTATION_TK3(0x6, 0x200); + PERMUTATION_TK3(0x9, 0x100); + PERMUTATION_TK3(0x5, 0x200); + +#endif + + // even + + // load master key + w0 = roundKeys[10]; + w1 = roundKeys[11]; + + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x000); + PERMUTATION_TK3(0xf, 0x000); + PERMUTATION_TK3(0xe, 0x300); + PERMUTATION_TK3(0xb, 0x300); + PERMUTATION_TK3(0xf, 0x200); + PERMUTATION_TK3(0xc, 0x300); + PERMUTATION_TK3(0x3, 0x300); + PERMUTATION_TK3(0xe, 0x000); + PERMUTATION_TK3(0xa, 0x300); + PERMUTATION_TK3(0xb, 0x200); + + PERMUTATION_TK3(0xc, 0x200); + PERMUTATION_TK3(0x0, 0x300); + PERMUTATION_TK3(0x2, 0x000); + PERMUTATION_TK3(0xb, 0x000); + PERMUTATION_TK3(0xe, 0x200); + PERMUTATION_TK3(0x8, 0x300); + PERMUTATION_TK3(0x3, 0x200); + PERMUTATION_TK3(0xd, 0x000); + PERMUTATION_TK3(0x6, 0x300); + PERMUTATION_TK3(0xa, 0x100); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x200); + PERMUTATION_TK3(0x4, 0x200); + PERMUTATION_TK3(0x1, 0x100); + PERMUTATION_TK3(0x4, 0x000); + PERMUTATION_TK3(0x3, 0x100); + PERMUTATION_TK3(0xc, 0x000); + PERMUTATION_TK3(0x2, 0x300); + PERMUTATION_TK3(0xa, 0x000); + +#endif + +} + +#else + +void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint16_t c0; + uint16_t c1; + + // odd + + // load master key + w0 = roundKeys[8]; + w1 = roundKeys[9]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + c1 <<= 8; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = roundKeys[10]; + w1 = roundKeys[11]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = &roundKeys[98]; +#else + pRC -= 110; + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + c1 <<= 8; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny_main.c new file mode 100644 index 0000000..74222ee --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32_NEC/skinny_main.c @@ -0,0 +1,537 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pack_word(T[0], T[1], T[2], T[3], pt[4]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pack_word(T[8], T[9], T[10], T[11], pt[6]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pack_word(K[0], K[1], K[2], K[3], pt[8]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pack_word(K[8], K[9], K[10], K[11], pt[10]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); +#endif + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pack_word(T[0], T[1], T[2], T[3], pt[4]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pack_word(T[8], T[9], T[10], T[11], pt[6]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); +#endif + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); +#endif + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ +/* permutation */ \ + \ + PERMUTATION(); \ + \ + /* store */ \ + \ + *tk1++ = w0; \ + *tk1++ = w1; + +#define SBOX_0(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0) ^ \ + (t1 << 8) ^ \ + (t2 << 16) ^ \ + (t3 << 24); + +#define SBOX_8(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 8) ^ \ + (t1 << 16) ^ \ + (t2 << 24) ^ \ + (t3); + +#define SBOX_16(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox2[t0]; /* AC(c2) */ \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 16) ^ \ + (t1 << 24) ^ \ + (t2) ^ \ + (t3 << 8); + +#define SBOX_24(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 24) ^ \ + (t1) ^ \ + (t2 << 8) ^ \ + (t3 << 16); + +#define SKINNY_MAIN() \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows) */ \ + \ + SBOX_0(w0); \ + SBOX_8(w1); \ + SBOX_16(w2); \ + SBOX_24(w3); \ + \ + /* LUT(with ShiftRows & AC(c2) */ \ + \ + w0 ^= *tk2++; \ + w1 ^= *tk2++; \ + \ + /* Load TK1 */ \ + \ + w0 ^= *tk1++; \ + w1 ^= *tk1++; \ + \ + /* MC */ \ + /* 0 2 3 */ \ + /* 0 */ \ + /* 1 2 */ \ + /* 0 2 */ \ + \ + /* 0^2 */ \ + t0 = w0 ^ w2; \ + \ + /* 1^2 */ \ + w2 = w1 ^ w2; \ + \ + /* 0 */ \ + w1 = w0; \ + \ + /* 0^2^3 */ \ + w0 = t0 ^ w3; \ + \ + /* 0^2 */ \ + w3 = t0; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2) */ \ + \ + SBOX_0(w0); \ + SBOX_8(w1); \ + SBOX_16(w2); \ + SBOX_24(w3); \ + \ + /* Load TK2^TK3^AC(c0 c1) */ \ + \ + w0 ^= *tk2++; \ + w1 ^= *tk2++; \ + \ + /* MC */ \ + /* 0 2 3 */ \ + /* 0 */ \ + /* 1 2 */ \ + /* 0 2 */ \ + \ + /* 0^2 */ \ + t0 = w0 ^ w2; \ + \ + /* 1^2 */ \ + w2 = w1 ^ w2; \ + \ + /* 0 */ \ + w1 = w0; \ + \ + /* 0^2^3 */ \ + w0 = t0 ^ w3; \ + \ + /* 0^2 */ \ + w3 = t0; + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t w2; + uint32_t w3; + +// TK1 + + // load master key + w0 = roundKeys[0]; + w1 = roundKeys[1]; + + // 1st round + // not need to store + + tk1 = &roundKeys[2]; + + // 2nd, ... ,8th round + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + +#ifndef ___ENABLE_WORD_CAST + pack_word(block[0], block[1], block[2], block[3], w0); + pack_word(block[4], block[5], block[6], block[7], w1); + pack_word(block[8], block[9], block[10], block[11], w2); + pack_word(block[12], block[13], block[14], block[15], w3); +#else + w0 = *(uint32_t*)(&block[0]); + w1 = *(uint32_t*)(&block[4]); + w2 = *(uint32_t*)(&block[8]); + w3 = *(uint32_t*)(&block[12]); +#endif + + tk2 = &roundKeys[16]; + tk1 = &roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +#ifndef ___ENABLE_WORD_CAST + unpack_word(block[0], block[1], block[2], block[3], w0); + unpack_word(block[4], block[5], block[6], block[7], w1); + unpack_word(block[8], block[9], block[10], block[11], w2); + unpack_word(block[12], block[13], block[14], block[15], w3); +#else + *(uint32_t*)(&block[0]) = w0; + *(uint32_t*)(&block[4]) = w1; + *(uint32_t*)(&block[8]) = w2; + *(uint32_t*)(&block[12]) = w3; +#endif + +} + +#else + +void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t w2; + uint32_t w3; + +// TK1 + + // load master key + w0 = roundKeys[0]; + w1 = roundKeys[1]; + + // 1st round + // not need to store + + tk1 = &roundKeys[2]; + + // 2nd, ... ,8th round + for(int i=0;i<7;i++) + { + PERMUTATION_TK1(); + } + +// SB+AC+ShR+MC + +#ifndef ___ENABLE_WORD_CAST + pack_word(block[0], block[1], block[2], block[3], w0); + pack_word(block[4], block[5], block[6], block[7], w1); + pack_word(block[8], block[9], block[10], block[11], w2); + pack_word(block[12], block[13], block[14], block[15], w3); +#else + w0 = *(uint32_t*)(&block[0]); + w1 = *(uint32_t*)(&block[4]); + w2 = *(uint32_t*)(&block[8]); + w3 = *(uint32_t*)(&block[12]); +#endif + + tk2 = &roundKeys[16]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = &roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = &roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +#ifndef ___ENABLE_WORD_CAST + unpack_word(block[0], block[1], block[2], block[3], w0); + unpack_word(block[4], block[5], block[6], block[7], w1); + unpack_word(block[8], block[9], block[10], block[11], w2); + unpack_word(block[12], block[13], block[14], block[15], w3); +#else + *(uint32_t*)(&block[0]) = w0; + *(uint32_t*)(&block[4]) = w1; + *(uint32_t*)(&block[8]) = w2; + *(uint32_t*)(&block[12]) = w3; +#endif + +} + +#endif diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/api.h b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/encrypt.c new file mode 100644 index 0000000..8668d91 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/encrypt.c @@ -0,0 +1,1024 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = (len8 & 0x0f); + for (int i = 0; i < len8; i++) { + mp[i] = m[i]; + } + +#else + + mp[0] = 0; + mp[1] = 0; + mp[2] = 0; + mp[3] = 0; + mp[4] = 0; + mp[5] = 0; + mp[6] = 0; + mp[7] = 0; + mp[8] = 0; + mp[9] = 0; + mp[10] = 0; + mp[11] = 0; + mp[12] = 0; + mp[13] = 0; + mp[14] = 0; + mp[15] = (len8 & 0x0f); + for (int i = 0; i < len8; i++) { + mp[i] = m[i]; + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_WORD_CAST + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#else + + uint32_t s0, s1, s2, s3; + uint32_t c0, c1, c2, c3; + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + unpack_word(c[0], c[1], c[2], c[3], c0); + unpack_word(c[4], c[5], c[6], c[7], c1); + unpack_word(c[8], c[9], c[10], c[11], c2); + unpack_word(c[12], c[13], c[14], c[15], c3); + +#endif + +} + +#ifdef ___ENABLE_WORD_CAST + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +} + +#endif + +#define rho_ad_eqov16_macro(i) \ + s[i] = s[i] ^ m[i]; + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#else + + rho_ad_eqov16_macro(0); + rho_ad_eqov16_macro(1); + rho_ad_eqov16_macro(2); + rho_ad_eqov16_macro(3); + rho_ad_eqov16_macro(4); + rho_ad_eqov16_macro(5); + rho_ad_eqov16_macro(6); + rho_ad_eqov16_macro(7); + rho_ad_eqov16_macro(8); + rho_ad_eqov16_macro(9); + rho_ad_eqov16_macro(10); + rho_ad_eqov16_macro(11); + rho_ad_eqov16_macro(12); + rho_ad_eqov16_macro(13); + rho_ad_eqov16_macro(14); + rho_ad_eqov16_macro(15); + +#endif + +} + +#define rho_ad_ud16_macro(i) \ + s[i] = s[i] ^ mp[i]; + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#else + + rho_ad_ud16_macro(0); + rho_ad_ud16_macro(1); + rho_ad_ud16_macro(2); + rho_ad_ud16_macro(3); + rho_ad_ud16_macro(4); + rho_ad_ud16_macro(5); + rho_ad_ud16_macro(6); + rho_ad_ud16_macro(7); + rho_ad_ud16_macro(8); + rho_ad_ud16_macro(9); + rho_ad_ud16_macro(10); + rho_ad_ud16_macro(11); + rho_ad_ud16_macro(12); + rho_ad_ud16_macro(13); + rho_ad_ud16_macro(14); + rho_ad_ud16_macro(15); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_WORD_CAST + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#else + + uint32_t c0, c1, c2, c3; + uint32_t s0, s1, s2, s3; + uint32_t m0, m1, m2, m3; + + pack_word(m[0], m[1], m[2], m[3], m0); + pack_word(m[4], m[5], m[6], m[7], m1); + pack_word(m[8], m[9], m[10], m[11], m2); + pack_word(m[12], m[13], m[14], m[15], m3); + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + pack_word(c[0], c[1], c[2], c[3], c0); + pack_word(c[4], c[5], c[6], c[7], c1); + pack_word(c[8], c[9], c[10], c[11], c2); + pack_word(c[12], c[13], c[14], c[15], c3); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + unpack_word(s[0], s[1], s[2], s[3], s0); + unpack_word(s[4], s[5], s[6], s[7], s1); + unpack_word(s[8], s[9], s[10], s[11], s2); + unpack_word(s[12], s[13], s[14], s[15], s3); + + unpack_word(c[0], c[1], c[2], c[3], c0); + unpack_word(c[4], c[5], c[6], c[7], c1); + unpack_word(c[8], c[9], c[10], c[11], c2); + unpack_word(c[12], c[13], c[14], c[15], c3); + +#endif + +} + +#define rho_ud16_macro(i) \ + s[i] = s[i] ^ mp[i]; + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + + for (int i = 0; i < 16; i++) { + if (i < len8) { + c[i] = c[i] ^ mp[i]; + } + else { + c[i] = 0; + } + } + +#else + + rho_ud16_macro(0); + rho_ud16_macro(1); + rho_ud16_macro(2); + rho_ud16_macro(3); + rho_ud16_macro(4); + rho_ud16_macro(5); + rho_ud16_macro(6); + rho_ud16_macro(7); + rho_ud16_macro(8); + rho_ud16_macro(9); + rho_ud16_macro(10); + rho_ud16_macro(11); + rho_ud16_macro(12); + rho_ud16_macro(13); + rho_ud16_macro(14); + rho_ud16_macro(15); + + for (int i = 0; i < 16; i++) { + if (i < len8) { + c[i] = c[i] ^ mp[i]; + } + else { + c[i] = 0; + } + } + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_WORD_CAST + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#else + + uint32_t c0, c1, c2, c3; + uint32_t s0, s1, s2, s3; + uint32_t m0, m1, m2, m3; + + pack_word(m[0], m[1], m[2], m[3], m0); + pack_word(m[4], m[5], m[6], m[7], m1); + pack_word(m[8], m[9], m[10], m[11], m2); + pack_word(m[12], m[13], m[14], m[15], m3); + + pack_word(s[0], s[1], s[2], s[3], s0); + pack_word(s[4], s[5], s[6], s[7], s1); + pack_word(s[8], s[9], s[10], s[11], s2); + pack_word(s[12], s[13], s[14], s[15], s3); + + pack_word(c[0], c[1], c[2], c[3], c0); + pack_word(c[4], c[5], c[6], c[7], c1); + pack_word(c[8], c[9], c[10], c[11], c2); + pack_word(c[12], c[13], c[14], c[15], c3); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + unpack_word(s[0], s[1], s[2], s[3], s0); + unpack_word(s[4], s[5], s[6], s[7], s1); + unpack_word(s[8], s[9], s[10], s[11], s2); + unpack_word(s[12], s[13], s[14], s[15], s3); + + unpack_word(m[0], m[1], m[2], m[3], m0); + unpack_word(m[4], m[5], m[6], m[7], m1); + unpack_word(m[8], m[9], m[10], m[11], m2); + unpack_word(m[12], m[13], m[14], m[15], m3); + +#endif + +} + +#define irho_ud16_macro(i) \ + s[i] = s[i] ^ cp[i]; + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&cp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&cp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&cp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&cp[12]); + + for (int i = 0; i < len8; i++) { + s[i] ^= m[i]; + } + + for (int i = 0; i < 16; i++) { + if (i < len8) { + m[i] = m[i] ^ cp[i]; + } + else { + m[i] = 0; + } + } + +#else + + irho_ud16_macro(0); + irho_ud16_macro(1); + irho_ud16_macro(2); + irho_ud16_macro(3); + irho_ud16_macro(4); + irho_ud16_macro(5); + irho_ud16_macro(6); + irho_ud16_macro(7); + irho_ud16_macro(8); + irho_ud16_macro(9); + irho_ud16_macro(10); + irho_ud16_macro(11); + irho_ud16_macro(12); + irho_ud16_macro(13); + irho_ud16_macro(14); + irho_ud16_macro(15); + + for (int i = 0; i < len8; i++) { + s[i] ^= m[i]; + } + + for (int i = 0; i < 16; i++) { + if (i < len8) { + m[i] = m[i] ^ cp[i]; + } + else { + m[i] = 0; + } + } + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#else + + CNT[0] = 0x01; + CNT[1] = 0x00; + CNT[2] = 0x00; + CNT[3] = 0x00; + CNT[4] = 0x00; + CNT[5] = 0x00; + CNT[6] = 0x00; + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_WORD_CAST + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#else + + uint32_t fb0 = CNT[6] >> 7; + + CNT[6] = (CNT[6] << 1) | (CNT[5] >> 7); + CNT[5] = (CNT[5] << 1) | (CNT[4] >> 7); + CNT[4] = (CNT[4] << 1) | (CNT[3] >> 7); + CNT[3] = (CNT[3] << 1) | (CNT[2] >> 7); + CNT[2] = (CNT[2] << 1) | (CNT[1] >> 7); + CNT[1] = (CNT[1] << 1) | (CNT[0] >> 7); + if (fb0 == 1) { + CNT[0] = (CNT[0] << 1) ^ 0x95; + } + else { + CNT[0] = (CNT[0] << 1); + } + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + +#ifdef ___ENABLE_WORD_CAST + + g8A_for_Tag_Generation(s, *c); + +#else + + g8A(s, *c); + +#endif + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption_eqov16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + + rho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return mlen - 16; + +} + +unsigned long long msg_encryption_ud16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + + rho_ud16(*M, *c, s, mlen); + *c = *c + mlen; + *M = *M + mlen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long msg_decryption_eqov16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return clen - 16; + +} +unsigned long long msg_decryption_ud16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_ud16(*M, *c, s, clen); + *c = *c + clen; + *M = *M + clen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long ad_encryption_eqov32 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&T[0]) = *(uint32_t*)(&(*A)[0]); + *(uint32_t*)(&T[4]) = *(uint32_t*)(&(*A)[4]); + *(uint32_t*)(&T[8]) = *(uint32_t*)(&(*A)[8]); + *(uint32_t*)(&T[12]) = *(uint32_t*)(&(*A)[12]); + +#else + + T[0] = (*A)[0]; + T[1] = (*A)[1]; + T[2] = (*A)[2]; + T[3] = (*A)[3]; + T[4] = (*A)[4]; + T[5] = (*A)[5]; + T[6] = (*A)[6]; + T[7] = (*A)[7]; + T[8] = (*A)[8]; + T[9] = (*A)[9]; + T[10] = (*A)[10]; + T[11] = (*A)[11]; + T[12] = (*A)[12]; + T[13] = (*A)[13]; + T[14] = (*A)[14]; + T[15] = (*A)[15]; + +#endif + + *A = *A + 16; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return adlen - 32; + +} + +unsigned long long ad_encryption_ov16 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + adlen = adlen - 16; + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + pad(*A, T, adlen); + *A = *A + adlen; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_eq16 ( + const unsigned char** A, unsigned char* s, + unsigned char* CNT) { + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_ud16( + const unsigned char** A, unsigned char* s, + unsigned long long adlen, + unsigned char* CNT) { + + rho_ad_ud16(*A, s, adlen); + *A = *A + adlen; + lfsr_gf56(CNT); + + return 0; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + const unsigned char* A; + const unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#else + + s[0] = 0; + s[1] = 0; + s[2] = 0; + s[3] = 0; + s[4] = 0; + s[5] = 0; + s[6] = 0; + s[7] = 0; + s[8] = 0; + s[9] = 0; + s[10] = 0; + s[11] = 0; + s[12] = 0; + s[13] = 0; + s[14] = 0; + s[15] = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + *clen = mlen + 16; + + if (mlen == 0) { // M is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (mlen > 0) { + if (mlen < 16) { // The last block of M is incomplete + mlen = msg_encryption_ud16(&M,&c,N,CNT,s,k,0x15,mlen,&ctrl); + } + else if (mlen == 16) { // The last block of M is complete + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x14,mlen,&ctrl); + } + else { // A normal full message block + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x04,mlen,&ctrl); + } + } + + // Tag generation + generate_tag(&c,s,clen); + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char T[16]; + unsigned char CNT[8]; + const unsigned char* A; + unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_WORD_CAST + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#else + + s[0] = 0; + s[1] = 0; + s[2] = 0; + s[3] = 0; + s[4] = 0; + s[5] = 0; + s[6] = 0; + s[7] = 0; + s[8] = 0; + s[9] = 0; + s[10] = 0; + s[11] = 0; + s[12] = 0; + s[13] = 0; + s[14] = 0; + s[15] = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + clen = clen -16; + *mlen = clen; + + if (clen == 0) { // C is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (clen > 0) { + if (clen < 16) { // The last block of C is incomplete + clen = msg_decryption_ud16(&M,&c,N,CNT,s,k,0x15,clen,&ctrl); + } + else if (clen == 16) { // The last block of C is complete + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x14,clen,&ctrl); + } + else { // A normal full message block + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x04,clen,&ctrl); + } + } + + // Tag generation +#ifdef ___ENABLE_WORD_CAST + + g8A_for_Tag_Generation(s, T); + +#else + + g8A(s, T); + +#endif + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny.h new file mode 100644 index 0000000..5b36459 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny.h @@ -0,0 +1,69 @@ +#define ___SKINNY_LOOP +#define ___NUM_OF_ROUNDS_56 +#define ___ENABLE_WORD_CAST + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + uint32_t roundKeys[240]; // number of rounds : 56 +#else + uint32_t roundKeys[176]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..923d4b8 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny_key_schedule2.c @@ -0,0 +1,227 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[4]; + w1 = roundKeys[5]; + + tk2 = &roundKeys[16]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = roundKeys[6]; + w1 = roundKeys[7]; + + tk2 = &roundKeys[18]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else + +void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[4]; + w1 = roundKeys[5]; + + tk2 = &roundKeys[16]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = roundKeys[6]; + w1 = roundKeys[7]; + + tk2 = &roundKeys[18]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..39254a6 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny_key_schedule3.c @@ -0,0 +1,228 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ c1Val; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = roundKeys[8]; + w1 = roundKeys[9]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x000); + PERMUTATION_TK3(0xf, 0x100); + PERMUTATION_TK3(0xd, 0x300); + PERMUTATION_TK3(0x7, 0x300); + PERMUTATION_TK3(0xe, 0x100); + PERMUTATION_TK3(0x9, 0x300); + PERMUTATION_TK3(0x7, 0x200); + PERMUTATION_TK3(0xd, 0x100); + PERMUTATION_TK3(0x5, 0x300); + + PERMUTATION_TK3(0x6, 0x100); + PERMUTATION_TK3(0x8, 0x100); + PERMUTATION_TK3(0x1, 0x200); + PERMUTATION_TK3(0x5, 0x000); + PERMUTATION_TK3(0x7, 0x100); + PERMUTATION_TK3(0xc, 0x100); + PERMUTATION_TK3(0x1, 0x300); + PERMUTATION_TK3(0x6, 0x000); + PERMUTATION_TK3(0xb, 0x100); + PERMUTATION_TK3(0xd, 0x200); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x300); + PERMUTATION_TK3(0x2, 0x100); + PERMUTATION_TK3(0x8, 0x000); + PERMUTATION_TK3(0x2, 0x200); + PERMUTATION_TK3(0x9, 0x000); + PERMUTATION_TK3(0x6, 0x200); + PERMUTATION_TK3(0x9, 0x100); + PERMUTATION_TK3(0x5, 0x200); + +#endif + + // even + + // load master key + w0 = roundKeys[10]; + w1 = roundKeys[11]; + + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[98]; +#else + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x000); + PERMUTATION_TK3(0xf, 0x000); + PERMUTATION_TK3(0xe, 0x300); + PERMUTATION_TK3(0xb, 0x300); + PERMUTATION_TK3(0xf, 0x200); + PERMUTATION_TK3(0xc, 0x300); + PERMUTATION_TK3(0x3, 0x300); + PERMUTATION_TK3(0xe, 0x000); + PERMUTATION_TK3(0xa, 0x300); + PERMUTATION_TK3(0xb, 0x200); + + PERMUTATION_TK3(0xc, 0x200); + PERMUTATION_TK3(0x0, 0x300); + PERMUTATION_TK3(0x2, 0x000); + PERMUTATION_TK3(0xb, 0x000); + PERMUTATION_TK3(0xe, 0x200); + PERMUTATION_TK3(0x8, 0x300); + PERMUTATION_TK3(0x3, 0x200); + PERMUTATION_TK3(0xd, 0x000); + PERMUTATION_TK3(0x6, 0x300); + PERMUTATION_TK3(0xa, 0x100); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x200); + PERMUTATION_TK3(0x4, 0x200); + PERMUTATION_TK3(0x1, 0x100); + PERMUTATION_TK3(0x4, 0x000); + PERMUTATION_TK3(0x3, 0x100); + PERMUTATION_TK3(0xc, 0x000); + PERMUTATION_TK3(0x2, 0x300); + PERMUTATION_TK3(0xa, 0x000); + +#endif + +} + +#else + +void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint16_t c0; + uint16_t c1; + + // odd + + // load master key + w0 = roundKeys[8]; + w1 = roundKeys[9]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = &roundKeys[96]; +#else + tk3 = &roundKeys[128]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + c1 <<= 8; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = roundKeys[10]; + w1 = roundKeys[11]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = &roundKeys[98]; +#else + pRC -= 110; + tk3 = &roundKeys[130]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + c1 <<= 8; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny_main.c new file mode 100644 index 0000000..74222ee --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32_NEC/skinny_main.c @@ -0,0 +1,537 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(uint32_t *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(uint32_t *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pack_word(T[0], T[1], T[2], T[3], pt[4]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pack_word(T[8], T[9], T[10], T[11], pt[6]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pack_word(K[0], K[1], K[2], K[3], pt[8]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pack_word(K[8], K[9], K[10], K[11], pt[10]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); +#endif + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pack_word(T[0], T[1], T[2], T[3], pt[4]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pack_word(T[8], T[9], T[10], T[11], pt[6]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); +#endif + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; +#ifndef ___ENABLE_WORD_CAST + pack_word(CNT[0], CNT[1], CNT[2], CNT[3], pt[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); +#else + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); +#endif + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ +/* permutation */ \ + \ + PERMUTATION(); \ + \ + /* store */ \ + \ + *tk1++ = w0; \ + *tk1++ = w1; + +#define SBOX_0(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0) ^ \ + (t1 << 8) ^ \ + (t2 << 16) ^ \ + (t3 << 24); + +#define SBOX_8(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 8) ^ \ + (t1 << 16) ^ \ + (t2 << 24) ^ \ + (t3); + +#define SBOX_16(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox2[t0]; /* AC(c2) */ \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 16) ^ \ + (t1 << 24) ^ \ + (t2) ^ \ + (t3 << 8); + +#define SBOX_24(w) \ + \ + t0 = (w) & 0xff; \ + t1 = (w >> 8) & 0xff; \ + t2 = (w >> 16) & 0xff; \ + t3 = (w >> 24); \ + \ + t0 = sbox[t0]; \ + t1 = sbox[t1]; \ + t2 = sbox[t2]; \ + t3 = sbox[t3]; \ + \ + w = (t0 << 24) ^ \ + (t1) ^ \ + (t2 << 8) ^ \ + (t3 << 16); + +#define SKINNY_MAIN() \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows) */ \ + \ + SBOX_0(w0); \ + SBOX_8(w1); \ + SBOX_16(w2); \ + SBOX_24(w3); \ + \ + /* LUT(with ShiftRows & AC(c2) */ \ + \ + w0 ^= *tk2++; \ + w1 ^= *tk2++; \ + \ + /* Load TK1 */ \ + \ + w0 ^= *tk1++; \ + w1 ^= *tk1++; \ + \ + /* MC */ \ + /* 0 2 3 */ \ + /* 0 */ \ + /* 1 2 */ \ + /* 0 2 */ \ + \ + /* 0^2 */ \ + t0 = w0 ^ w2; \ + \ + /* 1^2 */ \ + w2 = w1 ^ w2; \ + \ + /* 0 */ \ + w1 = w0; \ + \ + /* 0^2^3 */ \ + w0 = t0 ^ w3; \ + \ + /* 0^2 */ \ + w3 = t0; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2) */ \ + \ + SBOX_0(w0); \ + SBOX_8(w1); \ + SBOX_16(w2); \ + SBOX_24(w3); \ + \ + /* Load TK2^TK3^AC(c0 c1) */ \ + \ + w0 ^= *tk2++; \ + w1 ^= *tk2++; \ + \ + /* MC */ \ + /* 0 2 3 */ \ + /* 0 */ \ + /* 1 2 */ \ + /* 0 2 */ \ + \ + /* 0^2 */ \ + t0 = w0 ^ w2; \ + \ + /* 1^2 */ \ + w2 = w1 ^ w2; \ + \ + /* 0 */ \ + w1 = w0; \ + \ + /* 0^2^3 */ \ + w0 = t0 ^ w3; \ + \ + /* 0^2 */ \ + w3 = t0; + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t w2; + uint32_t w3; + +// TK1 + + // load master key + w0 = roundKeys[0]; + w1 = roundKeys[1]; + + // 1st round + // not need to store + + tk1 = &roundKeys[2]; + + // 2nd, ... ,8th round + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + +#ifndef ___ENABLE_WORD_CAST + pack_word(block[0], block[1], block[2], block[3], w0); + pack_word(block[4], block[5], block[6], block[7], w1); + pack_word(block[8], block[9], block[10], block[11], w2); + pack_word(block[12], block[13], block[14], block[15], w3); +#else + w0 = *(uint32_t*)(&block[0]); + w1 = *(uint32_t*)(&block[4]); + w2 = *(uint32_t*)(&block[8]); + w3 = *(uint32_t*)(&block[12]); +#endif + + tk2 = &roundKeys[16]; + tk1 = &roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = &roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +#ifndef ___ENABLE_WORD_CAST + unpack_word(block[0], block[1], block[2], block[3], w0); + unpack_word(block[4], block[5], block[6], block[7], w1); + unpack_word(block[8], block[9], block[10], block[11], w2); + unpack_word(block[12], block[13], block[14], block[15], w3); +#else + *(uint32_t*)(&block[0]) = w0; + *(uint32_t*)(&block[4]) = w1; + *(uint32_t*)(&block[8]) = w2; + *(uint32_t*)(&block[12]) = w3; +#endif + +} + +#else + +void Encrypt(unsigned char *block, uint32_t *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t w2; + uint32_t w3; + +// TK1 + + // load master key + w0 = roundKeys[0]; + w1 = roundKeys[1]; + + // 1st round + // not need to store + + tk1 = &roundKeys[2]; + + // 2nd, ... ,8th round + for(int i=0;i<7;i++) + { + PERMUTATION_TK1(); + } + +// SB+AC+ShR+MC + +#ifndef ___ENABLE_WORD_CAST + pack_word(block[0], block[1], block[2], block[3], w0); + pack_word(block[4], block[5], block[6], block[7], w1); + pack_word(block[8], block[9], block[10], block[11], w2); + pack_word(block[12], block[13], block[14], block[15], w3); +#else + w0 = *(uint32_t*)(&block[0]); + w1 = *(uint32_t*)(&block[4]); + w2 = *(uint32_t*)(&block[8]); + w3 = *(uint32_t*)(&block[12]); +#endif + + tk2 = &roundKeys[16]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = &roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = &roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +#ifndef ___ENABLE_WORD_CAST + unpack_word(block[0], block[1], block[2], block[3], w0); + unpack_word(block[4], block[5], block[6], block[7], w1); + unpack_word(block[8], block[9], block[10], block[11], w2); + unpack_word(block[12], block[13], block[14], block[15], w3); +#else + *(uint32_t*)(&block[0]) = w0; + *(uint32_t*)(&block[4]) = w1; + *(uint32_t*)(&block[8]) = w2; + *(uint32_t*)(&block[12]) = w3; +#endif + +} + +#endif