/* * Copyright (C) 2020 Southern Storm Software, Pty Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include "internal-photon256.h" #include "internal-util.h" /** * \brief Number of rounds in the PHOTON-256 permutation in bit-sliced form. */ #define PHOTON256_ROUNDS 12 /* Round constants for PHOTON-256 */ static uint32_t const photon256_rc[PHOTON256_ROUNDS] = { 0x96d2f0e1, 0xb4f0d2c3, 0xf0b49687, 0x692d0f1e, 0x5a1e3c2d, 0x3c785a4b, 0xe1a58796, 0x4b0f2d3c, 0x1e5a7869, 0xa5e1c3d2, 0xd296b4a5, 0x2d694b5a }; /** * \brief Evaluates the PHOTON-256 S-box in bit-sliced form. * * \param x0 Slice with bit 0 of all nibbles. * \param x1 Slice with bit 1 of all nibbles. * \param x2 Slice with bit 2 of all nibbles. * \param x3 Slice with bit 3 of all nibbles. * * This bit-sliced S-box implementation is based on the AVR version * "add_avr8_bitslice_asm" from the PHOTON-Beetle reference code. */ #define photon256_sbox(x0, x1, x2, x3) \ do { \ x1 ^= x2; \ x3 ^= (x2 & x1); \ t1 = x3; \ x3 = (x3 & x1) ^ x2; \ t2 = x3; \ x3 ^= x0; \ x3 = ~(x3); \ x2 = x3; \ t2 |= x0; \ x0 ^= t1; \ x1 ^= x0; \ x2 |= x1; \ x2 ^= t1; \ x1 ^= t2; \ x3 ^= x1; \ } while (0) /** * \brief Performs a field multiplication on the 8 nibbles in a row. * * \param a Field constant to multiply by. * \param x Bit-sliced form of the row, with bits 0..3 of each nibble * in bytes 0..3 of the word. * * \return a * x packed into the bytes of a word. */ static uint32_t photon256_field_multiply(uint8_t a, uint32_t x) { /* For each 4-bit nibble we need to do this: * * result = 0; * for (bit = 0; bit < 4; ++ bit) { * if ((a & (1 << bit)) != 0) * result ^= x; * if ((x & 0x08) != 0) { * x = (x << 1) ^ 3; * } else { * x = (x << 1); * } * } * * We don't need to worry about constant time for "a" because it is a * known constant that isn't data-dependent. But we do need to worry * about constant time for "x" as it is data. */ uint32_t result = 0; uint32_t t; #define PARALLEL_CONDITIONAL_ADD(bit) \ do { \ if ((a) & (1 << (bit))) \ result ^= x; \ } while (0) #define PARALELL_ROTATE() \ do { \ t = x >> 24; \ x = (x << 8) ^ t ^ (t << 8); \ } while (0) PARALLEL_CONDITIONAL_ADD(0); PARALELL_ROTATE(); PARALLEL_CONDITIONAL_ADD(1); PARALELL_ROTATE(); PARALLEL_CONDITIONAL_ADD(2); PARALELL_ROTATE(); PARALLEL_CONDITIONAL_ADD(3); return result; } /* http://programming.sirrida.de/perm_fn.html#bit_permute_step */ #define bit_permute_step(_y, mask, shift) \ do { \ uint32_t y = (_y); \ uint32_t t = ((y >> (shift)) ^ y) & (mask); \ (_y) = (y ^ t) ^ (t << (shift)); \ } while (0) /** * \brief Converts a PHOTON-256 state into bit-sliced form. * * \param out Points to the converted output. * \param in Points to the PHOTON-256 state to convert. */ static void photon256_to_sliced (uint32_t out[PHOTON256_STATE_SIZE / 4], const unsigned char in[PHOTON256_STATE_SIZE]) { /* We first scatter bits 0..3 of the nibbles to bytes 0..3 of the words. * Then we rearrange the bytes to group all bits N into word N. * * Permutation generated with "http://programming.sirrida.de/calcperm.php". * * P = [0 8 16 24 1 9 17 25 2 10 18 26 3 11 19 27 * 4 12 20 28 5 13 21 29 6 14 22 30 7 15 23 31] */ uint32_t t0, t1, t2, t3; #define TO_BITSLICED_PERM(x) \ do { \ bit_permute_step(x, 0x0a0a0a0a, 3); \ bit_permute_step(x, 0x00cc00cc, 6); \ bit_permute_step(x, 0x0000f0f0, 12); \ bit_permute_step(x, 0x0000ff00, 8); \ } while (0) #define FROM_BITSLICED_PERM(x) \ do { \ bit_permute_step(x, 0x00aa00aa, 7); \ bit_permute_step(x, 0x0000cccc, 14); \ bit_permute_step(x, 0x00f000f0, 4); \ bit_permute_step(x, 0x0000ff00, 8); \ } while (0) t0 = le_load_word32(in); t1 = le_load_word32(in + 4); t2 = le_load_word32(in + 8); t3 = le_load_word32(in + 12); TO_BITSLICED_PERM(t0); TO_BITSLICED_PERM(t1); TO_BITSLICED_PERM(t2); TO_BITSLICED_PERM(t3); out[0] = (t0 & 0x000000FFU) | ((t1 << 8) & 0x0000FF00U) | ((t2 << 16) & 0x00FF0000U) | ((t3 << 24) & 0xFF000000U); out[1] = ((t0 >> 8) & 0x000000FFU) | (t1 & 0x0000FF00U) | ((t2 << 8) & 0x00FF0000U) | ((t3 << 16) & 0xFF000000U); out[2] = ((t0 >> 16) & 0x000000FFU) | ((t1 >> 8) & 0x0000FF00U) | (t2 & 0x00FF0000U) | ((t3 << 8) & 0xFF000000U); out[3] = ((t0 >> 24) & 0x000000FFU) | ((t1 >> 16) & 0x0000FF00U) | ((t2 >> 8) & 0x00FF0000U) | (t3 & 0xFF000000U); t0 = le_load_word32(in + 16); t1 = le_load_word32(in + 20); t2 = le_load_word32(in + 24); t3 = le_load_word32(in + 28); TO_BITSLICED_PERM(t0); TO_BITSLICED_PERM(t1); TO_BITSLICED_PERM(t2); TO_BITSLICED_PERM(t3); out[4] = (t0 & 0x000000FFU) | ((t1 << 8) & 0x0000FF00U) | ((t2 << 16) & 0x00FF0000U) | ((t3 << 24) & 0xFF000000U); out[5] = ((t0 >> 8) & 0x000000FFU) | (t1 & 0x0000FF00U) | ((t2 << 8) & 0x00FF0000U) | ((t3 << 16) & 0xFF000000U); out[6] = ((t0 >> 16) & 0x000000FFU) | ((t1 >> 8) & 0x0000FF00U) | (t2 & 0x00FF0000U) | ((t3 << 8) & 0xFF000000U); out[7] = ((t0 >> 24) & 0x000000FFU) | ((t1 >> 16) & 0x0000FF00U) | ((t2 >> 8) & 0x00FF0000U) | (t3 & 0xFF000000U); } /** * \brief Converts a PHOTON-256 state from bit-sliced form. * * \param out Points to the converted output. * \param in Points to the PHOTON-256 state to convert. */ static void photon256_from_sliced (unsigned char out[PHOTON256_STATE_SIZE], const unsigned char in[PHOTON256_STATE_SIZE]) { /* Do the reverse of photon256_to_sliced() */ uint32_t x0, x1, x2, x3; x0 = ((uint32_t)(in[0])) | (((uint32_t)(in[4])) << 8) | (((uint32_t)(in[8])) << 16) | (((uint32_t)(in[12])) << 24); x1 = ((uint32_t)(in[1])) | (((uint32_t)(in[5])) << 8) | (((uint32_t)(in[9])) << 16) | (((uint32_t)(in[13])) << 24); x2 = ((uint32_t)(in[2])) | (((uint32_t)(in[6])) << 8) | (((uint32_t)(in[10])) << 16) | (((uint32_t)(in[14])) << 24); x3 = ((uint32_t)(in[3])) | (((uint32_t)(in[7])) << 8) | (((uint32_t)(in[11])) << 16) | (((uint32_t)(in[15])) << 24); FROM_BITSLICED_PERM(x0); FROM_BITSLICED_PERM(x1); FROM_BITSLICED_PERM(x2); FROM_BITSLICED_PERM(x3); le_store_word32(out, x0); le_store_word32(out + 4, x1); le_store_word32(out + 8, x2); le_store_word32(out + 12, x3); x0 = ((uint32_t)(in[16])) | (((uint32_t)(in[20])) << 8) | (((uint32_t)(in[24])) << 16) | (((uint32_t)(in[28])) << 24); x1 = ((uint32_t)(in[17])) | (((uint32_t)(in[21])) << 8) | (((uint32_t)(in[25])) << 16) | (((uint32_t)(in[29])) << 24); x2 = ((uint32_t)(in[18])) | (((uint32_t)(in[22])) << 8) | (((uint32_t)(in[26])) << 16) | (((uint32_t)(in[30])) << 24); x3 = ((uint32_t)(in[19])) | (((uint32_t)(in[23])) << 8) | (((uint32_t)(in[27])) << 16) | (((uint32_t)(in[31])) << 24); FROM_BITSLICED_PERM(x0); FROM_BITSLICED_PERM(x1); FROM_BITSLICED_PERM(x2); FROM_BITSLICED_PERM(x3); le_store_word32(out + 16, x0); le_store_word32(out + 20, x1); le_store_word32(out + 24, x2); le_store_word32(out + 28, x3); } #if defined(LW_UTIL_LITTLE_ENDIAN) /* Index the bit-sliced state bytes in little-endian byte order */ #define READ_ROW0() \ (((uint32_t)(S.bytes[0])) | \ (((uint32_t)(S.bytes[4])) << 8) | \ (((uint32_t)(S.bytes[8])) << 16) | \ (((uint32_t)(S.bytes[12])) << 24)) #define READ_ROW1() \ (((uint32_t)(S.bytes[1])) | \ (((uint32_t)(S.bytes[5])) << 8) | \ (((uint32_t)(S.bytes[9])) << 16) | \ (((uint32_t)(S.bytes[13])) << 24)) #define READ_ROW2() \ (((uint32_t)(S.bytes[2])) | \ (((uint32_t)(S.bytes[6])) << 8) | \ (((uint32_t)(S.bytes[10])) << 16) | \ (((uint32_t)(S.bytes[14])) << 24)) #define READ_ROW3() \ (((uint32_t)(S.bytes[3])) | \ (((uint32_t)(S.bytes[7])) << 8) | \ (((uint32_t)(S.bytes[11])) << 16) | \ (((uint32_t)(S.bytes[15])) << 24)) #define READ_ROW4() \ (((uint32_t)(S.bytes[16])) | \ (((uint32_t)(S.bytes[20])) << 8) | \ (((uint32_t)(S.bytes[24])) << 16) | \ (((uint32_t)(S.bytes[28])) << 24)) #define READ_ROW5() \ (((uint32_t)(S.bytes[17])) | \ (((uint32_t)(S.bytes[21])) << 8) | \ (((uint32_t)(S.bytes[25])) << 16) | \ (((uint32_t)(S.bytes[29])) << 24)) #define READ_ROW6() \ (((uint32_t)(S.bytes[18])) | \ (((uint32_t)(S.bytes[22])) << 8) | \ (((uint32_t)(S.bytes[26])) << 16) | \ (((uint32_t)(S.bytes[30])) << 24)) #define READ_ROW7() \ (((uint32_t)(S.bytes[19])) | \ (((uint32_t)(S.bytes[23])) << 8) | \ (((uint32_t)(S.bytes[27])) << 16) | \ (((uint32_t)(S.bytes[31])) << 24)) #define WRITE_ROW(row, value) \ do { \ if ((row) < 4) { \ S.bytes[(row)] = (uint8_t)(value); \ S.bytes[(row) + 4] = (uint8_t)((value) >> 8); \ S.bytes[(row) + 8] = (uint8_t)((value) >> 16); \ S.bytes[(row) + 12] = (uint8_t)((value) >> 24); \ } else { \ S.bytes[(row) + 12] = (uint8_t)(value); \ S.bytes[(row) + 16] = (uint8_t)((value) >> 8); \ S.bytes[(row) + 20] = (uint8_t)((value) >> 16); \ S.bytes[(row) + 24] = (uint8_t)((value) >> 24); \ } \ } while (0) #else /* Index the bit-sliced state bytes in big-endian byte order */ #define READ_ROW0() \ (((uint32_t)(S.bytes[3])) | \ (((uint32_t)(S.bytes[7])) << 8) | \ (((uint32_t)(S.bytes[11])) << 16) | \ (((uint32_t)(S.bytes[15])) << 24)) #define READ_ROW1() \ (((uint32_t)(S.bytes[2])) | \ (((uint32_t)(S.bytes[6])) << 8) | \ (((uint32_t)(S.bytes[10])) << 16) | \ (((uint32_t)(S.bytes[14])) << 24)) #define READ_ROW2() \ (((uint32_t)(S.bytes[1])) | \ (((uint32_t)(S.bytes[5])) << 8) | \ (((uint32_t)(S.bytes[9])) << 16) | \ (((uint32_t)(S.bytes[13])) << 24)) #define READ_ROW3() \ (((uint32_t)(S.bytes[0])) | \ (((uint32_t)(S.bytes[4])) << 8) | \ (((uint32_t)(S.bytes[8])) << 16) | \ (((uint32_t)(S.bytes[12])) << 24)) #define READ_ROW4() \ (((uint32_t)(S.bytes[19])) | \ (((uint32_t)(S.bytes[23])) << 8) | \ (((uint32_t)(S.bytes[27])) << 16) | \ (((uint32_t)(S.bytes[31])) << 24)) #define READ_ROW5() \ (((uint32_t)(S.bytes[18])) | \ (((uint32_t)(S.bytes[22])) << 8) | \ (((uint32_t)(S.bytes[26])) << 16) | \ (((uint32_t)(S.bytes[30])) << 24)) #define READ_ROW6() \ (((uint32_t)(S.bytes[17])) | \ (((uint32_t)(S.bytes[21])) << 8) | \ (((uint32_t)(S.bytes[25])) << 16) | \ (((uint32_t)(S.bytes[29])) << 24)) #define READ_ROW7() \ (((uint32_t)(S.bytes[16])) | \ (((uint32_t)(S.bytes[20])) << 8) | \ (((uint32_t)(S.bytes[24])) << 16) | \ (((uint32_t)(S.bytes[28])) << 24)) #define WRITE_ROW(row, value) \ do { \ if ((row) < 4) { \ S.bytes[3 - (row)] = (uint8_t)(value); \ S.bytes[7 - (row)] = (uint8_t)((value) >> 8); \ S.bytes[11 - (row)] = (uint8_t)((value) >> 16); \ S.bytes[15 - (row)] = (uint8_t)((value) >> 24); \ } else { \ S.bytes[20 - (row)] = (uint8_t)(value); \ S.bytes[24 - (row)] = (uint8_t)((value) >> 8); \ S.bytes[28 - (row)] = (uint8_t)((value) >> 16); \ S.bytes[32 - (row)] = (uint8_t)((value) >> 24); \ } \ } while (0) #endif void photon256_permute(unsigned char state[PHOTON256_STATE_SIZE]) { union { uint32_t words[PHOTON256_STATE_SIZE / 4]; uint8_t bytes[PHOTON256_STATE_SIZE]; } S; uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8; uint8_t round; /* Convert the state into bit-sliced form */ photon256_to_sliced(S.words, state); /* Perform all 12 permutation rounds */ for (round = 0; round < PHOTON256_ROUNDS; ++round) { /* Add the constants for this round */ t0 = photon256_rc[round]; S.words[0] ^= t0 & 0x01010101U; t0 >>= 1; S.words[1] ^= t0 & 0x01010101U; t0 >>= 1; S.words[2] ^= t0 & 0x01010101U; t0 >>= 1; S.words[3] ^= t0 & 0x01010101U; t0 >>= 1; S.words[4] ^= t0 & 0x01010101U; t0 >>= 1; S.words[5] ^= t0 & 0x01010101U; t0 >>= 1; S.words[6] ^= t0 & 0x01010101U; t0 >>= 1; S.words[7] ^= t0 & 0x01010101U; /* Apply the sbox to all nibbles in the state */ photon256_sbox(S.words[0], S.words[1], S.words[2], S.words[3]); photon256_sbox(S.words[4], S.words[5], S.words[6], S.words[7]); /* Rotate all rows left by the row number. * * We do this by applying permutations to the top and bottom words * to rearrange the bits into the rotated form. Permutations * generated with "http://programming.sirrida.de/calcperm.php". * * P_top = [0 1 2 3 4 5 6 7 15 8 9 10 11 12 13 14 22 23 * 16 17 18 19 20 21 29 30 31 24 25 26 27 28] * P_bot = [4 5 6 7 0 1 2 3 11 12 13 14 15 8 9 10 18 19 * 20 21 22 23 16 17 25 26 27 28 29 30 31 24 */ #define TOP_ROTATE_PERM(x) \ do { \ t1 = (x); \ bit_permute_step(t1, 0x07030100, 4); \ bit_permute_step(t1, 0x22331100, 2); \ bit_permute_step(t1, 0x55005500, 1); \ (x) = t1; \ } while (0) #define BOTTOM_ROTATE_PERM(x) \ do { \ t1 = (x); \ bit_permute_step(t1, 0x080c0e0f, 4); \ bit_permute_step(t1, 0x22331100, 2); \ bit_permute_step(t1, 0x55005500, 1); \ (x) = t1; \ } while (0) TOP_ROTATE_PERM(S.words[0]); TOP_ROTATE_PERM(S.words[1]); TOP_ROTATE_PERM(S.words[2]); TOP_ROTATE_PERM(S.words[3]); BOTTOM_ROTATE_PERM(S.words[4]); BOTTOM_ROTATE_PERM(S.words[5]); BOTTOM_ROTATE_PERM(S.words[6]); BOTTOM_ROTATE_PERM(S.words[7]); /* Mix the columns */ #define MUL(a, x) (photon256_field_multiply((a), (x))) t0 = READ_ROW0(); t1 = READ_ROW1(); t2 = READ_ROW2(); t3 = READ_ROW3(); t4 = READ_ROW4(); t5 = READ_ROW5(); t6 = READ_ROW6(); t7 = READ_ROW7(); t8 = MUL(0x02, t0) ^ MUL(0x04, t1) ^ MUL(0x02, t2) ^ MUL(0x0b, t3) ^ MUL(0x02, t4) ^ MUL(0x08, t5) ^ MUL(0x05, t6) ^ MUL(0x06, t7); WRITE_ROW(0, t8); t8 = MUL(0x0c, t0) ^ MUL(0x09, t1) ^ MUL(0x08, t2) ^ MUL(0x0d, t3) ^ MUL(0x07, t4) ^ MUL(0x07, t5) ^ MUL(0x05, t6) ^ MUL(0x02, t7); WRITE_ROW(1, t8); t8 = MUL(0x04, t0) ^ MUL(0x04, t1) ^ MUL(0x0d, t2) ^ MUL(0x0d, t3) ^ MUL(0x09, t4) ^ MUL(0x04, t5) ^ MUL(0x0d, t6) ^ MUL(0x09, t7); WRITE_ROW(2, t8); t8 = MUL(0x01, t0) ^ MUL(0x06, t1) ^ MUL(0x05, t2) ^ MUL(0x01, t3) ^ MUL(0x0c, t4) ^ MUL(0x0d, t5) ^ MUL(0x0f, t6) ^ MUL(0x0e, t7); WRITE_ROW(3, t8); t8 = MUL(0x0f, t0) ^ MUL(0x0c, t1) ^ MUL(0x09, t2) ^ MUL(0x0d, t3) ^ MUL(0x0e, t4) ^ MUL(0x05, t5) ^ MUL(0x0e, t6) ^ MUL(0x0d, t7); WRITE_ROW(4, t8); t8 = MUL(0x09, t0) ^ MUL(0x0e, t1) ^ MUL(0x05, t2) ^ MUL(0x0f, t3) ^ MUL(0x04, t4) ^ MUL(0x0c, t5) ^ MUL(0x09, t6) ^ MUL(0x06, t7); WRITE_ROW(5, t8); t8 = MUL(0x0c, t0) ^ MUL(0x02, t1) ^ MUL(0x02, t2) ^ MUL(0x0a, t3) ^ MUL(0x03, t4) ^ MUL(0x01, t5) ^ MUL(0x01, t6) ^ MUL(0x0e, t7); WRITE_ROW(6, t8); t8 = MUL(0x0f, t0) ^ MUL(0x01, t1) ^ MUL(0x0d, t2) ^ MUL(0x0a, t3) ^ MUL(0x05, t4) ^ MUL(0x0a, t5) ^ MUL(0x02, t6) ^ MUL(0x03, t7); WRITE_ROW(7, t8); } /* Convert back from bit-sliced form to regular form */ photon256_from_sliced(state, S.bytes); }