/* Implementation of the Lilliput-AE tweakable block cipher. Authors, hereby denoted as "the implementer": Alexandre Adomnicai, Kévin Le Gouguec, Léo Reynaud, 2019. For more information, feedback or questions, refer to our website: https://paclido.fr/lilliput-ae To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. http://creativecommons.org/publicdomain/zero/1.0/ --- This file provides a first-order threshold implementation of Lilliput-TBC's tweakey schedule, where the tweak and the key are split into two shares. */ #include #include #include "constants.h" #include "random.h" #include "tweakey.h" #define LANE_BITS 64 #define LANE_BYTES (LANE_BITS/8) #define LANES_NB (TWEAKEY_BYTES/LANE_BYTES) void tweakey_state_init( uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES], const uint8_t key[KEY_BYTES], const uint8_t tweak[TWEAK_BYTES] ) { uint8_t SHARES_0[KEY_BYTES]; randombytes(sizeof(SHARES_0), SHARES_0); memcpy(TK_Y, SHARES_0, KEY_BYTES); memcpy(TK_X, tweak, TWEAK_BYTES); for (size_t i=0; i>3 ^ x[3]; y[3] = x[2]; y[2] = x[6]<<2 ^ x[1]; y[1] = x[0]; y[0] = x[7]; } static void _multiply_M2(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) { uint8_t x_M_5 = x[5]<<3 ^ x[4]; uint8_t x_M_4 = x[4]>>3 ^ x[3]; y[7] = x[5]; y[6] = x_M_5; y[5] = x_M_5<<3 ^ x_M_4; y[4] = x_M_4>>3 ^ x[2]; y[3] = x[6]<<2 ^ x[1]; y[2] = x[5]<<2 ^ x[0]; y[1] = x[7]; y[0] = x[6]; } static void _multiply_M3(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) { uint8_t x_M_5 = x[5]<<3 ^ x[4]; uint8_t x_M_4 = x[4]>>3 ^ x[3]; uint8_t x_M2_5 = x_M_5<<3 ^ x_M_4; uint8_t x_M2_4 = x_M_4>>3 ^ x[2]; y[7] = x_M_5; y[6] = x_M2_5; y[5] = x_M2_5<<3 ^ x_M2_4; y[4] = x_M2_4>>3 ^ x[6]<<2 ^ x[1]; y[3] = x[5]<<2 ^ x[0]; y[2] = x_M_5<<2 ^ x[7]; y[1] = x[6]; y[0] = x[5]; } static void _multiply_MR(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) { y[0] = x[1]; y[1] = x[2]; y[2] = x[3] ^ x[4]>>3; y[3] = x[4]; y[4] = x[5] ^ x[6]<<3; y[5] = x[3]<<2 ^ x[6]; y[6] = x[7]; y[7] = x[0]; } static void _multiply_MR2(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) { uint8_t x_MR_4 = x[5] ^ x[6]<<3; y[0] = x[2]; y[1] = x[3] ^ x[4]>>3; y[2] = x[4] ^ x_MR_4>>3; y[3] = x_MR_4; y[4] = x[3]<<2 ^ x[6] ^ x[7]<<3; y[5] = x[4]<<2 ^ x[7]; y[6] = x[0]; y[7] = x[1]; } static void _multiply_MR3(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) { uint8_t x_MR_4 = x[5] ^ x[6]<<3; uint8_t x_MR2_4 = x[3]<<2 ^ x[6] ^ x[7]<<3; y[0] = x[3] ^ x[4]>>3; y[1] = x[4] ^ x_MR_4>>3; y[2] = x_MR_4 ^ x_MR2_4>>3; y[3] = x_MR2_4; y[4] = x[0]<<3 ^ x[4]<<2 ^ x[7]; y[5] = x_MR_4<<2 ^ x[0]; y[6] = x[1]; y[7] = x[2]; } typedef void (*matrix_multiplication)(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]); static const matrix_multiplication ALPHAS[6] = { _multiply_M, _multiply_M2, _multiply_M3, _multiply_MR, _multiply_MR2, _multiply_MR3 }; void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES]) { /* Skip lane 0, as it is multiplied by the identity matrix. */ for (size_t j=1; j<(TWEAK_BYTES/LANE_BYTES); j++) { uint8_t *TKj_X = TK_X + j*LANE_BYTES; uint8_t TKj_old_X[LANE_BYTES]; memcpy(TKj_old_X, TKj_X, LANE_BYTES); ALPHAS[j-1](TKj_old_X, TKj_X); } for (size_t j=0; j<(KEY_BYTES/LANE_BYTES); j++) { uint8_t *TKj_X = TK_X + (j + (TWEAK_BYTES/LANE_BYTES))*LANE_BYTES; uint8_t *TKj_Y = TK_Y + j*LANE_BYTES; uint8_t TKj_X_old[LANE_BYTES]; uint8_t TKj_Y_old[LANE_BYTES]; memcpy(TKj_X_old, TKj_X, LANE_BYTES); memcpy(TKj_Y_old, TKj_Y, LANE_BYTES); ALPHAS[j-1 + (TWEAK_BYTES/LANE_BYTES)](TKj_X_old, TKj_X); ALPHAS[j-1 + (TWEAK_BYTES/LANE_BYTES)](TKj_Y_old, TKj_Y); } }