diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..495399b --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/encrypt.c @@ -0,0 +1,1337 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (mlen >= 16) { + len8 = 16; + mlen = mlen - 16; + rho_eqov16(*M, *c, s); + } + else { + len8 = mlen; + mlen = 0; + rho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + if (mlen != 0) { + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + } + return mlen; + +} + +unsigned long long msg_decryption ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (clen >= 16) { + len8 = 16; + clen = clen - 16; + irho_eqov16(*M, *c, s); + } + else { + len8 = clen; + clen = 0; + irho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + return clen; + +} + +unsigned long long ad2msg_encryption ( + const unsigned char** M, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (mlen <= 16) { + len8 = mlen; + mlen = 0; + } + else { + len8 = 16; + mlen = mlen - 16; + } + + pad (*M,T,len8); + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + *M = *M + len8; + + return mlen; + +} + +unsigned long long ad_encryption ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + + rho_ad_eqov16(*A, s); + } + else { + len8 = adlen; + adlen = 0; + rho_ad_ud16(*A, s, len8); + } + *A = *A + len8; + lfsr_gf56(CNT); + if (adlen != 0) { + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + } + else { + len8 = adlen; + adlen = 0; + } + pad(*A, T, len8); + *A = *A + len8; + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + } + + return adlen; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + N = npub; + + xlen = mlen; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&m,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (mlen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&m,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + m = m - mlen; + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + *clen = mlen + 16; + + if (mlen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + while (mlen > 16) { + mlen = msg_encryption(&m,&c,N,CNT,s,k,36,mlen,&l_skinny_ctrl); + } + rho_ud16(m, c, s, mlen); + c = c + mlen; + m = m + mlen; + } + + // Tag Concatenation + c[0] = T[0]; + c[1] = T[1]; + c[2] = T[2]; + c[3] = T[3]; + c[4] = T[4]; + c[5] = T[5]; + c[6] = T[6]; + c[7] = T[7]; + c[8] = T[8]; + c[9] = T[9]; + c[10] = T[10]; + c[11] = T[11]; + c[12] = T[12]; + c[13] = T[13]; + c[14] = T[14]; + c[15] = T[15]; + + c = c - *clen; + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + const unsigned char* mauth; + unsigned char* p1; + unsigned char* p2; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + mauth = m; + + N = npub; + + xlen = clen-16; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&mauth,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (clen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&mauth,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + p1 = T; + p2 = (unsigned char*)&c[clen - 16]; + + p1[0] = p2[0]; + p1[1] = p2[1]; + p1[2] = p2[2]; + p1[3] = p2[3]; + p1[4] = p2[4]; + p1[5] = p2[5]; + p1[6] = p2[6]; + p1[7] = p2[7]; + p1[8] = p2[8]; + p1[9] = p2[9]; + p1[10] = p2[10]; + p1[11] = p2[11]; + p1[12] = p2[12]; + p1[13] = p2[13]; + p1[14] = p2[14]; + p1[15] = p2[15]; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + clen = clen - 16; + *mlen = clen; + + if (clen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + while (clen > 16) { + clen = msg_decryption(&m,&c,N,CNT,s,k,36,clen,&l_skinny_ctrl); + } + irho_ud16(m, c, s, clen); + c = c + clen; + m = m + clen; + } + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny.h new file mode 100644 index 0000000..c8e7b56 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +//#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..495399b --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/encrypt.c @@ -0,0 +1,1337 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (mlen >= 16) { + len8 = 16; + mlen = mlen - 16; + rho_eqov16(*M, *c, s); + } + else { + len8 = mlen; + mlen = 0; + rho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + if (mlen != 0) { + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + } + return mlen; + +} + +unsigned long long msg_decryption ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (clen >= 16) { + len8 = 16; + clen = clen - 16; + irho_eqov16(*M, *c, s); + } + else { + len8 = clen; + clen = 0; + irho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + return clen; + +} + +unsigned long long ad2msg_encryption ( + const unsigned char** M, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (mlen <= 16) { + len8 = mlen; + mlen = 0; + } + else { + len8 = 16; + mlen = mlen - 16; + } + + pad (*M,T,len8); + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + *M = *M + len8; + + return mlen; + +} + +unsigned long long ad_encryption ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + + rho_ad_eqov16(*A, s); + } + else { + len8 = adlen; + adlen = 0; + rho_ad_ud16(*A, s, len8); + } + *A = *A + len8; + lfsr_gf56(CNT); + if (adlen != 0) { + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + } + else { + len8 = adlen; + adlen = 0; + } + pad(*A, T, len8); + *A = *A + len8; + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + } + + return adlen; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + N = npub; + + xlen = mlen; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&m,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (mlen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&m,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + m = m - mlen; + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + *clen = mlen + 16; + + if (mlen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + while (mlen > 16) { + mlen = msg_encryption(&m,&c,N,CNT,s,k,36,mlen,&l_skinny_ctrl); + } + rho_ud16(m, c, s, mlen); + c = c + mlen; + m = m + mlen; + } + + // Tag Concatenation + c[0] = T[0]; + c[1] = T[1]; + c[2] = T[2]; + c[3] = T[3]; + c[4] = T[4]; + c[5] = T[5]; + c[6] = T[6]; + c[7] = T[7]; + c[8] = T[8]; + c[9] = T[9]; + c[10] = T[10]; + c[11] = T[11]; + c[12] = T[12]; + c[13] = T[13]; + c[14] = T[14]; + c[15] = T[15]; + + c = c - *clen; + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + const unsigned char* mauth; + unsigned char* p1; + unsigned char* p2; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + mauth = m; + + N = npub; + + xlen = clen-16; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&mauth,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (clen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&mauth,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + p1 = T; + p2 = (unsigned char*)&c[clen - 16]; + + p1[0] = p2[0]; + p1[1] = p2[1]; + p1[2] = p2[2]; + p1[3] = p2[3]; + p1[4] = p2[4]; + p1[5] = p2[5]; + p1[6] = p2[6]; + p1[7] = p2[7]; + p1[8] = p2[8]; + p1[9] = p2[9]; + p1[10] = p2[10]; + p1[11] = p2[11]; + p1[12] = p2[12]; + p1[13] = p2[13]; + p1[14] = p2[14]; + p1[15] = p2[15]; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + clen = clen - 16; + *mlen = clen; + + if (clen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + while (clen > 16) { + clen = msg_decryption(&m,&c,N,CNT,s,k,36,clen,&l_skinny_ctrl); + } + irho_ud16(m, c, s, clen); + c = c + clen; + m = m + clen; + } + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny.h new file mode 100644 index 0000000..826f2f8 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..f329721 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/encrypt.c @@ -0,0 +1,1136 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption_eqov16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + + rho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return mlen - 16; + +} + +unsigned long long msg_encryption_ud16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + +// char msg[64]; +// +// unsigned int st = (unsigned int )read_cycle(); + + rho_ud16(*M, *c, s, mlen); + +// unsigned int ed = (unsigned int )read_cycle(); +// sprintf(msg, "rho_ud16 %d\n", ed-st); +// SerialPuts(msg); +// +// fprint_bstr(NULL, "c = ", *c, 16); + + *c = *c + mlen; + *M = *M + mlen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long msg_decryption_eqov16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + + return clen - 16; + +} + +unsigned long long msg_decryption_ud16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_ud16(*M, *c, s, clen); + *c = *c + clen; + *M = *M + clen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long ad_encryption_eqov32 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&T[0]) = *(uint64_t*)(&(*A)[0]); + *(uint64_t*)(&T[8]) = *(uint64_t*)(&(*A)[8]); + +#else + + *(uint32_t*)(&T[0]) = *(uint32_t*)(&(*A)[0]); + *(uint32_t*)(&T[4]) = *(uint32_t*)(&(*A)[4]); + *(uint32_t*)(&T[8]) = *(uint32_t*)(&(*A)[8]); + *(uint32_t*)(&T[12]) = *(uint32_t*)(&(*A)[12]); + +#endif + + *A = *A + 16; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return adlen - 32; + +} + +unsigned long long ad_encryption_ov16 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + adlen = adlen - 16; + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + pad(*A, T, adlen); + *A = *A + adlen; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_eq16 ( + const unsigned char** A, unsigned char* s, + unsigned char* CNT) { + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_ud16( + const unsigned char** A, unsigned char* s, + unsigned long long adlen, + unsigned char* CNT) { + + rho_ad_ud16(*A, s, adlen); + *A = *A + adlen; + lfsr_gf56(CNT); + + return 0; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + const unsigned char* A; + const unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + *clen = mlen + 16; + + if (mlen == 0) { // M is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (mlen > 0) { + if (mlen < 16) { // The last block of M is incomplete + mlen = msg_encryption_ud16(&M,&c,N,CNT,s,k,0x15,mlen,&ctrl); + } + else if (mlen == 16) { // The last block of M is complete + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x14,mlen,&ctrl); + } + else { // A normal full message block + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x04,mlen,&ctrl); + } + } + + // Tag generation + generate_tag(&c,s,clen); + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char T[16]; + unsigned char CNT[8]; + const unsigned char* A; + unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + clen = clen -16; + *mlen = clen; + + if (clen == 0) { // C is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (clen > 0) { + if (clen < 16) { // The last block of C is incomplete + clen = msg_decryption_ud16(&M,&c,N,CNT,s,k,0x15,clen,&ctrl); + } + else if (clen == 16) { // The last block of C is complete + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x14,clen,&ctrl); + } + else { // A normal full message block + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x04,clen,&ctrl); + } + } + + // Tag generation + g8A_for_Tag_Generation(s, T); + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny.h new file mode 100644 index 0000000..c8e7b56 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +//#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..f329721 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/encrypt.c @@ -0,0 +1,1136 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption_eqov16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + + rho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return mlen - 16; + +} + +unsigned long long msg_encryption_ud16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + +// char msg[64]; +// +// unsigned int st = (unsigned int )read_cycle(); + + rho_ud16(*M, *c, s, mlen); + +// unsigned int ed = (unsigned int )read_cycle(); +// sprintf(msg, "rho_ud16 %d\n", ed-st); +// SerialPuts(msg); +// +// fprint_bstr(NULL, "c = ", *c, 16); + + *c = *c + mlen; + *M = *M + mlen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long msg_decryption_eqov16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + + return clen - 16; + +} + +unsigned long long msg_decryption_ud16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_ud16(*M, *c, s, clen); + *c = *c + clen; + *M = *M + clen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long ad_encryption_eqov32 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&T[0]) = *(uint64_t*)(&(*A)[0]); + *(uint64_t*)(&T[8]) = *(uint64_t*)(&(*A)[8]); + +#else + + *(uint32_t*)(&T[0]) = *(uint32_t*)(&(*A)[0]); + *(uint32_t*)(&T[4]) = *(uint32_t*)(&(*A)[4]); + *(uint32_t*)(&T[8]) = *(uint32_t*)(&(*A)[8]); + *(uint32_t*)(&T[12]) = *(uint32_t*)(&(*A)[12]); + +#endif + + *A = *A + 16; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return adlen - 32; + +} + +unsigned long long ad_encryption_ov16 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + adlen = adlen - 16; + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + pad(*A, T, adlen); + *A = *A + adlen; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_eq16 ( + const unsigned char** A, unsigned char* s, + unsigned char* CNT) { + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_ud16( + const unsigned char** A, unsigned char* s, + unsigned long long adlen, + unsigned char* CNT) { + + rho_ad_ud16(*A, s, adlen); + *A = *A + adlen; + lfsr_gf56(CNT); + + return 0; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + const unsigned char* A; + const unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + *clen = mlen + 16; + + if (mlen == 0) { // M is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (mlen > 0) { + if (mlen < 16) { // The last block of M is incomplete + mlen = msg_encryption_ud16(&M,&c,N,CNT,s,k,0x15,mlen,&ctrl); + } + else if (mlen == 16) { // The last block of M is complete + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x14,mlen,&ctrl); + } + else { // A normal full message block + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x04,mlen,&ctrl); + } + } + + // Tag generation + generate_tag(&c,s,clen); + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char T[16]; + unsigned char CNT[8]; + const unsigned char* A; + unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + clen = clen -16; + *mlen = clen; + + if (clen == 0) { // C is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (clen > 0) { + if (clen < 16) { // The last block of C is incomplete + clen = msg_decryption_ud16(&M,&c,N,CNT,s,k,0x15,clen,&ctrl); + } + else if (clen == 16) { // The last block of C is complete + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x14,clen,&ctrl); + } + else { // A normal full message block + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x04,clen,&ctrl); + } + } + + // Tag generation + g8A_for_Tag_Generation(s, T); + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny.h new file mode 100644 index 0000000..826f2f8 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ +