#include #include "api.h" #define PA_ROUNDS 12 #define PB_ROUNDS 8 #define ROR8(x, n) (((x) >> (n)) | ((x) << (8 - (n)))) #define COMPRESS_BYTE_ARRAY_8(a, var_7, var_6, var_5, var_4, var_3, var_2, \ var_1, var_0) \ do { \ var_0 = a[7]; \ var_1 = var_0 >> 1; \ var_2 = var_1 >> 1; \ var_3 = var_2 >> 1; \ var_4 = var_3 >> 1; \ var_5 = var_4 >> 1; \ var_6 = var_5 >> 1; \ var_7 = var_6 >> 1; \ var_0 &= 1; \ var_1 &= 1; \ var_2 &= 1; \ var_3 &= 1; \ var_4 &= 1; \ var_5 &= 1; \ var_6 &= 1; \ var_7 &= 1; \ t1_0 = a[6] << 1; \ t1_1 = a[6]; \ t1_2 = t1_1 >> 1; \ t1_3 = t1_2 >> 1; \ t1_4 = t1_3 >> 1; \ t1_5 = t1_4 >> 1; \ t1_6 = t1_5 >> 1; \ t1_7 = t1_6 >> 1; \ var_0 |= t1_0 & 2; \ var_1 |= t1_1 & 2; \ var_2 |= t1_2 & 2; \ var_3 |= t1_3 & 2; \ var_4 |= t1_4 & 2; \ var_5 |= t1_5 & 2; \ var_6 |= t1_6 & 2; \ var_7 |= t1_7 & 2; \ t1_1 = a[5] << 1; \ t1_0 = t1_1 << 1; \ t1_2 = a[5]; \ t1_3 = t1_2 >> 1; \ t1_4 = t1_3 >> 1; \ t1_5 = t1_4 >> 1; \ t1_6 = t1_5 >> 1; \ t1_7 = t1_6 >> 1; \ var_0 |= t1_0 & 4; \ var_1 |= t1_1 & 4; \ var_2 |= t1_2 & 4; \ var_3 |= t1_3 & 4; \ var_4 |= t1_4 & 4; \ var_5 |= t1_5 & 4; \ var_6 |= t1_6 & 4; \ var_7 |= t1_7 & 4; \ t1_2 = a[4] << 1; \ t1_1 = t1_2 << 1; \ t1_0 = t1_1 << 1; \ t1_3 = a[4]; \ t1_4 = t1_3 >> 1; \ t1_5 = t1_4 >> 1; \ t1_6 = t1_5 >> 1; \ t1_7 = t1_6 >> 1; \ var_0 |= t1_0 & 8; \ var_1 |= t1_1 & 8; \ var_2 |= t1_2 & 8; \ var_3 |= t1_3 & 8; \ var_4 |= t1_4 & 8; \ var_5 |= t1_5 & 8; \ var_6 |= t1_6 & 8; \ var_7 |= t1_7 & 8; \ t1_3 = a[3] << 1; \ t1_2 = t1_3 << 1; \ t1_1 = t1_2 << 1; \ t1_0 = t1_1 << 1; \ t1_4 = a[3]; \ t1_5 = t1_4 >> 1; \ t1_6 = t1_5 >> 1; \ t1_7 = t1_6 >> 1; \ var_0 |= t1_0 & 16; \ var_1 |= t1_1 & 16; \ var_2 |= t1_2 & 16; \ var_3 |= t1_3 & 16; \ var_4 |= t1_4 & 16; \ var_5 |= t1_5 & 16; \ var_6 |= t1_6 & 16; \ var_7 |= t1_7 & 16; \ t1_4 = a[2] << 1; \ t1_3 = t1_4 << 1; \ t1_2 = t1_3 << 1; \ t1_1 = t1_2 << 1; \ t1_0 = t1_1 << 1; \ t1_5 = a[2]; \ t1_6 = t1_5 >> 1; \ t1_7 = t1_6 >> 1; \ var_0 |= t1_0 & 32; \ var_1 |= t1_1 & 32; \ var_2 |= t1_2 & 32; \ var_3 |= t1_3 & 32; \ var_4 |= t1_4 & 32; \ var_5 |= t1_5 & 32; \ var_6 |= t1_6 & 32; \ var_7 |= t1_7 & 32; \ t1_5 = a[1] << 1; \ t1_4 = t1_5 << 1; \ t1_3 = t1_4 << 1; \ t1_2 = t1_3 << 1; \ t1_1 = t1_2 << 1; \ t1_0 = t1_1 << 1; \ t1_6 = a[1]; \ t1_7 = t1_6 >> 1; \ var_0 |= t1_0 & 64; \ var_1 |= t1_1 & 64; \ var_2 |= t1_2 & 64; \ var_3 |= t1_3 & 64; \ var_4 |= t1_4 & 64; \ var_5 |= t1_5 & 64; \ var_6 |= t1_6 & 64; \ var_7 |= t1_7 & 64; \ t1_6 = a[0] << 1; \ t1_5 = t1_6 << 1; \ t1_4 = t1_5 << 1; \ t1_3 = t1_4 << 1; \ t1_2 = t1_3 << 1; \ t1_1 = t1_2 << 1; \ t1_0 = t1_1 << 1; \ t1_7 = a[0]; \ var_0 |= t1_0 & 128; \ var_1 |= t1_1 & 128; \ var_2 |= t1_2 & 128; \ var_3 |= t1_3 & 128; \ var_4 |= t1_4 & 128; \ var_5 |= t1_5 & 128; \ var_6 |= t1_6 & 128; \ var_7 |= t1_7 & 128; \ } while (0) #define EXPAND_BYTE_ARRAY_8(a, var_7, var_6, var_5, var_4, var_3, var_2, \ var_1, var_0) \ do { \ a[7] = var_0 & 1; \ t1_0 = var_0 >> 1; \ a[6] = t1_0 & 1; \ t1_0 >>= 1; \ a[5] = t1_0 & 1; \ t1_0 >>= 1; \ a[4] = t1_0 & 1; \ t1_0 >>= 1; \ a[3] = t1_0 & 1; \ t1_0 >>= 1; \ a[2] = t1_0 & 1; \ t1_0 >>= 1; \ a[1] = t1_0 & 1; \ t1_0 >>= 1; \ a[0] = t1_0 & 1; \ a[6] |= var_1 & 2; \ t1_1 = var_1 << 1; \ a[7] |= t1_1 & 2; \ t1_1 = var_1 >> 1; \ a[5] |= t1_1 & 2; \ t1_1 >>= 1; \ a[4] |= t1_1 & 2; \ t1_1 >>= 1; \ a[3] |= t1_1 & 2; \ t1_1 >>= 1; \ a[2] |= t1_1 & 2; \ t1_1 >>= 1; \ a[1] |= t1_1 & 2; \ t1_1 >>= 1; \ a[0] |= t1_1 & 2; \ a[5] |= var_2 & 4; \ t1_2 = var_2 << 1; \ a[6] |= t1_2 & 4; \ t1_2 <<= 1; \ a[7] |= t1_2 & 4; \ t1_2 = var_2 >> 1; \ a[4] |= t1_2 & 4; \ t1_2 >>= 1; \ a[3] |= t1_2 & 4; \ t1_2 >>= 1; \ a[2] |= t1_2 & 4; \ t1_2 >>= 1; \ a[1] |= t1_2 & 4; \ t1_2 >>= 1; \ a[0] |= t1_2 & 4; \ a[4] |= var_3 & 8; \ t1_3 = var_3 << 1; \ a[5] |= t1_3 & 8; \ t1_3 <<= 1; \ a[6] |= t1_3 & 8; \ t1_3 <<= 1; \ a[7] |= t1_3 & 8; \ t1_3 = var_3 >> 1; \ a[3] |= t1_3 & 8; \ t1_3 >>= 1; \ a[2] |= t1_3 & 8; \ t1_3 >>= 1; \ a[1] |= t1_3 & 8; \ t1_3 >>= 1; \ a[0] |= t1_3 & 8; \ a[3] |= var_4 & 16; \ t1_4 = var_4 << 1; \ a[4] |= t1_4 & 16; \ t1_4 <<= 1; \ a[5] |= t1_4 & 16; \ t1_4 <<= 1; \ a[6] |= t1_4 & 16; \ t1_4 <<= 1; \ a[7] |= t1_4 & 16; \ t1_4 = var_4 >> 1; \ a[2] |= t1_4 & 16; \ t1_4 >>= 1; \ a[1] |= t1_4 & 16; \ t1_4 >>= 1; \ a[0] |= t1_4 & 16; \ a[2] |= var_5 & 32; \ t1_5 = var_5 << 1; \ a[3] |= t1_5 & 32; \ t1_5 <<= 1; \ a[4] |= t1_5 & 32; \ t1_5 <<= 1; \ a[5] |= t1_5 & 32; \ t1_5 <<= 1; \ a[6] |= t1_5 & 32; \ t1_5 <<= 1; \ a[7] |= t1_5 & 32; \ t1_5 = var_5 >> 1; \ a[1] |= t1_5 & 32; \ t1_5 >>= 1; \ a[0] |= t1_5 & 32; \ a[1] |= var_6 & 64; \ t1_6 = var_6 << 1; \ a[2] |= t1_6 & 64; \ t1_6 <<= 1; \ a[3] |= t1_6 & 64; \ t1_6 <<= 1; \ a[4] |= t1_6 & 64; \ t1_6 <<= 1; \ a[5] |= t1_6 & 64; \ t1_6 <<= 1; \ a[6] |= t1_6 & 64; \ t1_6 <<= 1; \ a[7] |= t1_6 & 64; \ t1_6 = var_6 >> 1; \ a[0] |= t1_6 & 64; \ a[0] |= var_7 & 128; \ t1_7 = var_7 << 1; \ a[1] |= t1_7 & 128; \ t1_7 <<= 1; \ a[2] |= t1_7 & 128; \ t1_7 <<= 1; \ a[3] |= t1_7 & 128; \ t1_7 <<= 1; \ a[4] |= t1_7 & 128; \ t1_7 <<= 1; \ a[5] |= t1_7 & 128; \ t1_7 <<= 1; \ a[6] |= t1_7 & 128; \ t1_7 <<= 1; \ a[7] |= t1_7 & 128; \ } while (0) /* This way of implementing Ascon's S-box was inpired by personal communication with Joan Daemen about implementing the 3-bit chi layer. */ #define ROUND_16(C_7, C_6, C_5, C_4, C_3, C_2, C_1, C_0) \ do { \ /* round constant */ \ x2_0 ^= C_0; \ x2_1 ^= C_1; \ x2_2 ^= C_2; \ x2_3 ^= C_3; \ x2_4 ^= C_4; \ x2_5 ^= C_5; \ x2_6 ^= C_6; \ x2_7 ^= C_7; \ /* s-box layer */ \ x0_0 ^= x4_0; \ x4_0 ^= x3_0; \ x2_0 ^= x1_0; \ t0_0 = x0_0 & (~x4_0); \ t1_0 = x2_0 & (~x1_0); \ x0_0 ^= t1_0; \ t1_0 = x4_0 & (~x3_0); \ x2_0 ^= t1_0; \ t1_0 = x1_0 & (~x0_0); \ x4_0 ^= t1_0; \ t1_0 = x3_0 & (~x2_0); \ x1_0 ^= t1_0; \ x3_0 ^= t0_0; \ x1_0 ^= x0_0; \ x3_0 ^= x2_0; \ x0_0 ^= x4_0; \ x2_0 = ~x2_0; \ x0_1 ^= x4_1; \ x4_1 ^= x3_1; \ x2_1 ^= x1_1; \ t0_0 = x0_1 & (~x4_1); \ t1_0 = x2_1 & (~x1_1); \ x0_1 ^= t1_0; \ t1_0 = x4_1 & (~x3_1); \ x2_1 ^= t1_0; \ t1_0 = x1_1 & (~x0_1); \ x4_1 ^= t1_0; \ t1_0 = x3_1 & (~x2_1); \ x1_1 ^= t1_0; \ x3_1 ^= t0_0; \ x1_1 ^= x0_1; \ x3_1 ^= x2_1; \ x0_1 ^= x4_1; \ x2_1 = ~x2_1; \ x0_2 ^= x4_2; \ x4_2 ^= x3_2; \ x2_2 ^= x1_2; \ t0_0 = x0_2 & (~x4_2); \ t1_0 = x2_2 & (~x1_2); \ x0_2 ^= t1_0; \ t1_0 = x4_2 & (~x3_2); \ x2_2 ^= t1_0; \ t1_0 = x1_2 & (~x0_2); \ x4_2 ^= t1_0; \ t1_0 = x3_2 & (~x2_2); \ x1_2 ^= t1_0; \ x3_2 ^= t0_0; \ x1_2 ^= x0_2; \ x3_2 ^= x2_2; \ x0_2 ^= x4_2; \ x2_2 = ~x2_2; \ x0_3 ^= x4_3; \ x4_3 ^= x3_3; \ x2_3 ^= x1_3; \ t0_0 = x0_3 & (~x4_3); \ t1_0 = x2_3 & (~x1_3); \ x0_3 ^= t1_0; \ t1_0 = x4_3 & (~x3_3); \ x2_3 ^= t1_0; \ t1_0 = x1_3 & (~x0_3); \ x4_3 ^= t1_0; \ t1_0 = x3_3 & (~x2_3); \ x1_3 ^= t1_0; \ x3_3 ^= t0_0; \ x1_3 ^= x0_3; \ x3_3 ^= x2_3; \ x0_3 ^= x4_3; \ x2_3 = ~x2_3; \ x0_4 ^= x4_4; \ x4_4 ^= x3_4; \ x2_4 ^= x1_4; \ t0_0 = x0_4 & (~x4_4); \ t1_0 = x2_4 & (~x1_4); \ x0_4 ^= t1_0; \ t1_0 = x4_4 & (~x3_4); \ x2_4 ^= t1_0; \ t1_0 = x1_4 & (~x0_4); \ x4_4 ^= t1_0; \ t1_0 = x3_4 & (~x2_4); \ x1_4 ^= t1_0; \ x3_4 ^= t0_0; \ x1_4 ^= x0_4; \ x3_4 ^= x2_4; \ x0_4 ^= x4_4; \ x2_4 = ~x2_4; \ x0_5 ^= x4_5; \ x4_5 ^= x3_5; \ x2_5 ^= x1_5; \ t0_0 = x0_5 & (~x4_5); \ t1_0 = x2_5 & (~x1_5); \ x0_5 ^= t1_0; \ t1_0 = x4_5 & (~x3_5); \ x2_5 ^= t1_0; \ t1_0 = x1_5 & (~x0_5); \ x4_5 ^= t1_0; \ t1_0 = x3_5 & (~x2_5); \ x1_5 ^= t1_0; \ x3_5 ^= t0_0; \ x1_5 ^= x0_5; \ x3_5 ^= x2_5; \ x0_5 ^= x4_5; \ x2_5 = ~x2_5; \ x0_6 ^= x4_6; \ x4_6 ^= x3_6; \ x2_6 ^= x1_6; \ t0_0 = x0_6 & (~x4_6); \ t1_0 = x2_6 & (~x1_6); \ x0_6 ^= t1_0; \ t1_0 = x4_6 & (~x3_6); \ x2_6 ^= t1_0; \ t1_0 = x1_6 & (~x0_6); \ x4_6 ^= t1_0; \ t1_0 = x3_6 & (~x2_6); \ x1_6 ^= t1_0; \ x3_6 ^= t0_0; \ x1_6 ^= x0_6; \ x3_6 ^= x2_6; \ x0_6 ^= x4_6; \ x2_6 = ~x2_6; \ x0_7 ^= x4_7; \ x4_7 ^= x3_7; \ x2_7 ^= x1_7; \ t0_0 = x0_7 & (~x4_7); \ t1_0 = x2_7 & (~x1_7); \ x0_7 ^= t1_0; \ t1_0 = x4_7 & (~x3_7); \ x2_7 ^= t1_0; \ t1_0 = x1_7 & (~x0_7); \ x4_7 ^= t1_0; \ t1_0 = x3_7 & (~x2_7); \ x1_7 ^= t1_0; \ x3_7 ^= t0_0; \ x1_7 ^= x0_7; \ x3_7 ^= x2_7; \ x0_7 ^= x4_7; \ x2_7 = ~x2_7; \ /* linear layer */ \ t0_0 = x0_0; \ t0_1 = x0_1; \ t0_2 = x0_2; \ t0_3 = x0_3; \ t0_4 = x0_4; \ t0_5 = x0_5; \ t0_6 = x0_6; \ t0_7 = x0_7; \ x0_5 ^= ROR8(t0_0, 3); \ x0_6 ^= ROR8(t0_1, 3); \ x0_7 ^= ROR8(t0_2, 3); \ x0_0 ^= ROR8(t0_3, 2); \ x0_1 ^= ROR8(t0_4, 2); \ x0_2 ^= ROR8(t0_5, 2); \ x0_3 ^= ROR8(t0_6, 2); \ x0_4 ^= ROR8(t0_7, 2); \ x0_4 ^= ROR8(t0_0, 4); \ x0_5 ^= ROR8(t0_1, 4); \ x0_6 ^= ROR8(t0_2, 4); \ x0_7 ^= ROR8(t0_3, 4); \ x0_0 ^= ROR8(t0_4, 3); \ x0_1 ^= ROR8(t0_5, 3); \ x0_2 ^= ROR8(t0_6, 3); \ x0_3 ^= ROR8(t0_7, 3); \ t0_0 = x1_0; \ t0_1 = x1_1; \ t0_2 = x1_2; \ t0_3 = x1_3; \ t0_4 = x1_4; \ t0_5 = x1_5; \ t0_6 = x1_6; \ t0_7 = x1_7; \ x1_3 ^= t0_0; \ x1_4 ^= t0_1; \ x1_5 ^= t0_2; \ x1_6 ^= t0_3; \ x1_7 ^= t0_4; \ x1_0 ^= ROR8(t0_5, 7); \ x1_1 ^= ROR8(t0_6, 7); \ x1_2 ^= ROR8(t0_7, 7); \ x1_1 ^= ROR8(t0_0, 5); \ x1_2 ^= ROR8(t0_1, 5); \ x1_3 ^= ROR8(t0_2, 5); \ x1_4 ^= ROR8(t0_3, 5); \ x1_5 ^= ROR8(t0_4, 5); \ x1_6 ^= ROR8(t0_5, 5); \ x1_7 ^= ROR8(t0_6, 5); \ x1_0 ^= ROR8(t0_7, 4); \ t0_0 = x2_0; \ t0_1 = x2_1; \ t0_2 = x2_2; \ t0_3 = x2_3; \ t0_4 = x2_4; \ t0_5 = x2_5; \ t0_6 = x2_6; \ t0_7 = x2_7; \ x2_7 ^= ROR8(t0_0, 1); \ x2_0 ^= t0_1; \ x2_1 ^= t0_2; \ x2_2 ^= t0_3; \ x2_3 ^= t0_4; \ x2_4 ^= t0_5; \ x2_5 ^= t0_6; \ x2_6 ^= t0_7; \ x2_2 ^= ROR8(t0_0, 1); \ x2_3 ^= ROR8(t0_1, 1); \ x2_4 ^= ROR8(t0_2, 1); \ x2_5 ^= ROR8(t0_3, 1); \ x2_6 ^= ROR8(t0_4, 1); \ x2_7 ^= ROR8(t0_5, 1); \ x2_0 ^= t0_6; \ x2_1 ^= t0_7; \ t0_0 = x3_0; \ t0_1 = x3_1; \ t0_2 = x3_2; \ t0_3 = x3_3; \ t0_4 = x3_4; \ t0_5 = x3_5; \ t0_6 = x3_6; \ t0_7 = x3_7; \ x3_6 ^= ROR8(t0_0, 2); \ x3_7 ^= ROR8(t0_1, 2); \ x3_0 ^= ROR8(t0_2, 1); \ x3_1 ^= ROR8(t0_3, 1); \ x3_2 ^= ROR8(t0_4, 1); \ x3_3 ^= ROR8(t0_5, 1); \ x3_4 ^= ROR8(t0_6, 1); \ x3_5 ^= ROR8(t0_7, 1); \ x3_7 ^= ROR8(t0_0, 3); \ x3_0 ^= ROR8(t0_1, 2); \ x3_1 ^= ROR8(t0_2, 2); \ x3_2 ^= ROR8(t0_3, 2); \ x3_3 ^= ROR8(t0_4, 2); \ x3_4 ^= ROR8(t0_5, 2); \ x3_5 ^= ROR8(t0_6, 2); \ x3_6 ^= ROR8(t0_7, 2); \ t0_0 = x4_0; \ t0_1 = x4_1; \ t0_2 = x4_2; \ t0_3 = x4_3; \ t0_4 = x4_4; \ t0_5 = x4_5; \ t0_6 = x4_6; \ t0_7 = x4_7; \ x4_1 ^= ROR8(t0_0, 1); \ x4_2 ^= ROR8(t0_1, 1); \ x4_3 ^= ROR8(t0_2, 1); \ x4_4 ^= ROR8(t0_3, 1); \ x4_5 ^= ROR8(t0_4, 1); \ x4_6 ^= ROR8(t0_5, 1); \ x4_7 ^= ROR8(t0_6, 1); \ x4_0 ^= t0_7; \ x4_7 ^= ROR8(t0_0, 6); \ x4_0 ^= ROR8(t0_1, 5); \ x4_1 ^= ROR8(t0_2, 5); \ x4_2 ^= ROR8(t0_3, 5); \ x4_3 ^= ROR8(t0_4, 5); \ x4_4 ^= ROR8(t0_5, 5); \ x4_5 ^= ROR8(t0_6, 5); \ x4_6 ^= ROR8(t0_7, 5); \ } while (0) #define P12_8 \ do { \ ROUND_16(1, 1, 1, 1, 0, 0, 0, 0); \ ROUND_16(1, 1, 1, 0, 0, 0, 0, 1); \ ROUND_16(1, 1, 0, 1, 0, 0, 1, 0); \ ROUND_16(1, 1, 0, 0, 0, 0, 1, 1); \ ROUND_16(1, 0, 1, 1, 0, 1, 0, 0); \ ROUND_16(1, 0, 1, 0, 0, 1, 0, 1); \ ROUND_16(1, 0, 0, 1, 0, 1, 1, 0); \ ROUND_16(1, 0, 0, 0, 0, 1, 1, 1); \ ROUND_16(0, 1, 1, 1, 1, 0, 0, 0); \ ROUND_16(0, 1, 1, 0, 1, 0, 0, 1); \ ROUND_16(0, 1, 0, 1, 1, 0, 1, 0); \ ROUND_16(0, 1, 0, 0, 1, 0, 1, 1); \ } while (0) #define P8_8 \ do { \ ROUND_16(1, 0, 1, 1, 0, 1, 0, 0); \ ROUND_16(1, 0, 1, 0, 0, 1, 0, 1); \ ROUND_16(1, 0, 0, 1, 0, 1, 1, 0); \ ROUND_16(1, 0, 0, 0, 0, 1, 1, 1); \ ROUND_16(0, 1, 1, 1, 1, 0, 0, 0); \ ROUND_16(0, 1, 1, 0, 1, 0, 0, 1); \ ROUND_16(0, 1, 0, 1, 1, 0, 1, 0); \ ROUND_16(0, 1, 0, 0, 1, 0, 1, 1); \ } while (0) int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, uint64_t mlen, const uint8_t* ad, uint64_t adlen, const uint8_t* nsec, const uint8_t* npub, const uint8_t* k) { uint64_t rlen; uint64_t i; uint8_t buffer[ASCON_RATE]; uint8_t K0_0; uint8_t K1_0; uint8_t N0_0; uint8_t N1_0; uint8_t x0_0, x1_0, x2_0, x3_0, x4_0; uint8_t t0_0, t1_0; uint8_t K0_1; uint8_t K1_1; uint8_t N0_1; uint8_t N1_1; uint8_t x0_1, x1_1, x2_1, x3_1, x4_1; uint8_t t0_1, t1_1; uint8_t K0_2; uint8_t K1_2; uint8_t N0_2; uint8_t N1_2; uint8_t x0_2, x1_2, x2_2, x3_2, x4_2; uint8_t t0_2, t1_2; uint8_t K0_3; uint8_t K1_3; uint8_t N0_3; uint8_t N1_3; uint8_t x0_3, x1_3, x2_3, x3_3, x4_3; uint8_t t0_3, t1_3; uint8_t K0_4; uint8_t K1_4; uint8_t N0_4; uint8_t N1_4; uint8_t x0_4, x1_4, x2_4, x3_4, x4_4; uint8_t t0_4, t1_4; uint8_t K0_5; uint8_t K1_5; uint8_t N0_5; uint8_t N1_5; uint8_t x0_5, x1_5, x2_5, x3_5, x4_5; uint8_t t0_5, t1_5; uint8_t K0_6; uint8_t K1_6; uint8_t N0_6; uint8_t N1_6; uint8_t x0_6, x1_6, x2_6, x3_6, x4_6; uint8_t t0_6, t1_6; uint8_t K0_7; uint8_t K1_7; uint8_t N0_7; uint8_t N1_7; uint8_t x0_7, x1_7, x2_7, x3_7, x4_7; uint8_t t0_7, t1_7; uint8_t in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; (void)nsec; COMPRESS_BYTE_ARRAY_8(k, K0_7, K0_6, K0_5, K0_4, K0_3, K0_2, K0_1, K0_0); COMPRESS_BYTE_ARRAY_8((k + 8), K1_7, K1_6, K1_5, K1_4, K1_3, K1_2, K1_1, K1_0); COMPRESS_BYTE_ARRAY_8(npub, N0_7, N0_6, N0_5, N0_4, N0_3, N0_2, N0_1, N0_0); COMPRESS_BYTE_ARRAY_8((npub + 8), N1_7, N1_6, N1_5, N1_4, N1_3, N1_2, N1_1, N1_0); /* initialization */ buffer[0] = (uint8_t)(CRYPTO_KEYBYTES * 8); buffer[1] = (uint8_t)(ASCON_RATE * 8); buffer[2] = (uint8_t)PA_ROUNDS; buffer[3] = (uint8_t)PB_ROUNDS; buffer[4] = 0; buffer[5] = 0; buffer[6] = 0; buffer[7] = 0; COMPRESS_BYTE_ARRAY_8(buffer, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); x1_0 = K0_0; x1_1 = K0_1; x1_2 = K0_2; x1_3 = K0_3; x1_4 = K0_4; x1_5 = K0_5; x1_6 = K0_6; x1_7 = K0_7; x2_0 = K1_0; x2_1 = K1_1; x2_2 = K1_2; x2_3 = K1_3; x2_4 = K1_4; x2_5 = K1_5; x2_6 = K1_6; x2_7 = K1_7; x3_0 = N0_0; x3_1 = N0_1; x3_2 = N0_2; x3_3 = N0_3; x3_4 = N0_4; x3_5 = N0_5; x3_6 = N0_6; x3_7 = N0_7; x4_0 = N1_0; x4_1 = N1_1; x4_2 = N1_2; x4_3 = N1_3; x4_4 = N1_4; x4_5 = N1_5; x4_6 = N1_6; x4_7 = N1_7; P12_8; x3_0 ^= K0_0; x3_1 ^= K0_1; x3_2 ^= K0_2; x3_3 ^= K0_3; x3_4 ^= K0_4; x3_5 ^= K0_5; x3_6 ^= K0_6; x3_7 ^= K0_7; x4_0 ^= K1_0; x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; x4_4 ^= K1_4; x4_5 ^= K1_5; x4_6 ^= K1_6; x4_7 ^= K1_7; /* process associated data */ if (adlen) { rlen = adlen; while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_8(ad, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; x0_4 ^= in_4; x0_5 ^= in_5; x0_6 ^= in_6; x0_7 ^= in_7; COMPRESS_BYTE_ARRAY_8((ad + 8), in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; x1_4 ^= in_4; x1_5 ^= in_5; x1_6 ^= in_6; x1_7 ^= in_7; P8_8; rlen -= ASCON_RATE; ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; for (i = rlen + 1; i < ASCON_RATE; ++i) buffer[i] = 0; COMPRESS_BYTE_ARRAY_8(buffer, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; x0_4 ^= in_4; x0_5 ^= in_5; x0_6 ^= in_6; x0_7 ^= in_7; COMPRESS_BYTE_ARRAY_8((buffer + 8), in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; x1_4 ^= in_4; x1_5 ^= in_5; x1_6 ^= in_6; x1_7 ^= in_7; P8_8; } x4_0 ^= 1; /* process plaintext */ rlen = mlen; while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_8(m, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; x0_4 ^= in_4; x0_5 ^= in_5; x0_6 ^= in_6; x0_7 ^= in_7; EXPAND_BYTE_ARRAY_8(c, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); COMPRESS_BYTE_ARRAY_8((m + 8), in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; x1_4 ^= in_4; x1_5 ^= in_5; x1_6 ^= in_6; x1_7 ^= in_7; EXPAND_BYTE_ARRAY_8((c + 8), x1_7, x1_6, x1_5, x1_4, x1_3, x1_2, x1_1, x1_0); P8_8; rlen -= ASCON_RATE; m += ASCON_RATE; c += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++m) buffer[i] = *m; buffer[rlen] = 0x80; for (i = rlen + 1; i < ASCON_RATE; ++i) buffer[i] = 0; COMPRESS_BYTE_ARRAY_8(buffer, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; x0_4 ^= in_4; x0_5 ^= in_5; x0_6 ^= in_6; x0_7 ^= in_7; EXPAND_BYTE_ARRAY_8(buffer, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); COMPRESS_BYTE_ARRAY_8((buffer + 8), in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; x1_4 ^= in_4; x1_5 ^= in_5; x1_6 ^= in_6; x1_7 ^= in_7; EXPAND_BYTE_ARRAY_8((buffer + 8), x1_7, x1_6, x1_5, x1_4, x1_3, x1_2, x1_1, x1_0); for (i = 0; i < rlen; ++i, ++c) *c = buffer[i]; /* finalization */ x2_0 ^= K0_0; x2_1 ^= K0_1; x2_2 ^= K0_2; x2_3 ^= K0_3; x2_4 ^= K0_4; x2_5 ^= K0_5; x2_6 ^= K0_6; x2_7 ^= K0_7; x3_0 ^= K1_0; x3_1 ^= K1_1; x3_2 ^= K1_2; x3_3 ^= K1_3; x3_4 ^= K1_4; x3_5 ^= K1_5; x3_6 ^= K1_6; x3_7 ^= K1_7; P12_8; x3_0 ^= K0_0; x3_1 ^= K0_1; x3_2 ^= K0_2; x3_3 ^= K0_3; x3_4 ^= K0_4; x3_5 ^= K0_5; x3_6 ^= K0_6; x3_7 ^= K0_7; x4_0 ^= K1_0; x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; x4_4 ^= K1_4; x4_5 ^= K1_5; x4_6 ^= K1_6; x4_7 ^= K1_7; /* return tag */ EXPAND_BYTE_ARRAY_8(c, x3_7, x3_6, x3_5, x3_4, x3_3, x3_2, x3_1, x3_0); c += 8; EXPAND_BYTE_ARRAY_8(c, x4_7, x4_6, x4_5, x4_4, x4_3, x4_2, x4_1, x4_0); *clen = mlen + CRYPTO_ABYTES; return 0; } int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, const uint8_t* c, uint64_t clen, const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* k) { *mlen = 0; if (clen < CRYPTO_ABYTES) return -1; uint64_t rlen; uint64_t i; uint16_t ret_val; uint8_t buffer[ASCON_RATE]; uint8_t K0_0; uint8_t K1_0; uint8_t N0_0; uint8_t N1_0; uint8_t x0_0, x1_0, x2_0, x3_0, x4_0; uint8_t t0_0, t1_0; uint8_t K0_1; uint8_t K1_1; uint8_t N0_1; uint8_t N1_1; uint8_t x0_1, x1_1, x2_1, x3_1, x4_1; uint8_t t0_1, t1_1; uint8_t K0_2; uint8_t K1_2; uint8_t N0_2; uint8_t N1_2; uint8_t x0_2, x1_2, x2_2, x3_2, x4_2; uint8_t t0_2, t1_2; uint8_t K0_3; uint8_t K1_3; uint8_t N0_3; uint8_t N1_3; uint8_t x0_3, x1_3, x2_3, x3_3, x4_3; uint8_t t0_3, t1_3; uint8_t K0_4; uint8_t K1_4; uint8_t N0_4; uint8_t N1_4; uint8_t x0_4, x1_4, x2_4, x3_4, x4_4; uint8_t t0_4, t1_4; uint8_t K0_5; uint8_t K1_5; uint8_t N0_5; uint8_t N1_5; uint8_t x0_5, x1_5, x2_5, x3_5, x4_5; uint8_t t0_5, t1_5; uint8_t K0_6; uint8_t K1_6; uint8_t N0_6; uint8_t N1_6; uint8_t x0_6, x1_6, x2_6, x3_6, x4_6; uint8_t t0_6, t1_6; uint8_t K0_7; uint8_t K1_7; uint8_t N0_7; uint8_t N1_7; uint8_t x0_7, x1_7, x2_7, x3_7, x4_7; uint8_t t0_7, t1_7; uint8_t in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; (void)nsec; COMPRESS_BYTE_ARRAY_8(k, K0_7, K0_6, K0_5, K0_4, K0_3, K0_2, K0_1, K0_0); COMPRESS_BYTE_ARRAY_8((k + 8), K1_7, K1_6, K1_5, K1_4, K1_3, K1_2, K1_1, K1_0); COMPRESS_BYTE_ARRAY_8(npub, N0_7, N0_6, N0_5, N0_4, N0_3, N0_2, N0_1, N0_0); COMPRESS_BYTE_ARRAY_8((npub + 8), N1_7, N1_6, N1_5, N1_4, N1_3, N1_2, N1_1, N1_0); /* initialization */ buffer[0] = (uint8_t)(CRYPTO_KEYBYTES * 8); buffer[1] = (uint8_t)(ASCON_RATE * 8); buffer[2] = (uint8_t)PA_ROUNDS; buffer[3] = (uint8_t)PB_ROUNDS; buffer[4] = 0; buffer[5] = 0; buffer[6] = 0; buffer[7] = 0; COMPRESS_BYTE_ARRAY_8(buffer, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); x1_0 = K0_0; x1_1 = K0_1; x1_2 = K0_2; x1_3 = K0_3; x1_4 = K0_4; x1_5 = K0_5; x1_6 = K0_6; x1_7 = K0_7; x2_0 = K1_0; x2_1 = K1_1; x2_2 = K1_2; x2_3 = K1_3; x2_4 = K1_4; x2_5 = K1_5; x2_6 = K1_6; x2_7 = K1_7; x3_0 = N0_0; x3_1 = N0_1; x3_2 = N0_2; x3_3 = N0_3; x3_4 = N0_4; x3_5 = N0_5; x3_6 = N0_6; x3_7 = N0_7; x4_0 = N1_0; x4_1 = N1_1; x4_2 = N1_2; x4_3 = N1_3; x4_4 = N1_4; x4_5 = N1_5; x4_6 = N1_6; x4_7 = N1_7; P12_8; x3_0 ^= K0_0; x3_1 ^= K0_1; x3_2 ^= K0_2; x3_3 ^= K0_3; x3_4 ^= K0_4; x3_5 ^= K0_5; x3_6 ^= K0_6; x3_7 ^= K0_7; x4_0 ^= K1_0; x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; x4_4 ^= K1_4; x4_5 ^= K1_5; x4_6 ^= K1_6; x4_7 ^= K1_7; /* process associated data */ if (adlen) { rlen = adlen; while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_8(ad, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; x0_4 ^= in_4; x0_5 ^= in_5; x0_6 ^= in_6; x0_7 ^= in_7; COMPRESS_BYTE_ARRAY_8((ad + 8), in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; x1_4 ^= in_4; x1_5 ^= in_5; x1_6 ^= in_6; x1_7 ^= in_7; P8_8; rlen -= ASCON_RATE; ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; for (i = rlen + 1; i < ASCON_RATE; ++i) buffer[i] = 0; COMPRESS_BYTE_ARRAY_8(buffer, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; x0_4 ^= in_4; x0_5 ^= in_5; x0_6 ^= in_6; x0_7 ^= in_7; COMPRESS_BYTE_ARRAY_8((buffer + 8), in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; x1_4 ^= in_4; x1_5 ^= in_5; x1_6 ^= in_6; x1_7 ^= in_7; P8_8; } x4_0 ^= 1; /* process ciphertext */ rlen = clen - CRYPTO_KEYBYTES; while (rlen >= ASCON_RATE) { EXPAND_BYTE_ARRAY_8(m, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); EXPAND_BYTE_ARRAY_8((m + 8), x1_7, x1_6, x1_5, x1_4, x1_3, x1_2, x1_1, x1_0); for (i = 0; i < ASCON_RATE; ++i) m[i] ^= c[i]; COMPRESS_BYTE_ARRAY_8(c, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); COMPRESS_BYTE_ARRAY_8((c + 8), x1_7, x1_6, x1_5, x1_4, x1_3, x1_2, x1_1, x1_0); P8_8; rlen -= ASCON_RATE; m += ASCON_RATE; c += ASCON_RATE; } EXPAND_BYTE_ARRAY_8(buffer, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); EXPAND_BYTE_ARRAY_8((buffer + 8), x1_7, x1_6, x1_5, x1_4, x1_3, x1_2, x1_1, x1_0); for (i = 0; i < rlen; ++i, ++m, ++c) { *m = buffer[i] ^ *c; buffer[i] = *c; } buffer[rlen] ^= 0x80; COMPRESS_BYTE_ARRAY_8(buffer, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); COMPRESS_BYTE_ARRAY_8((buffer + 8), x1_7, x1_6, x1_5, x1_4, x1_3, x1_2, x1_1, x1_0); /* finalization */ x2_0 ^= K0_0; x2_1 ^= K0_1; x2_2 ^= K0_2; x2_3 ^= K0_3; x2_4 ^= K0_4; x2_5 ^= K0_5; x2_6 ^= K0_6; x2_7 ^= K0_7; x3_0 ^= K1_0; x3_1 ^= K1_1; x3_2 ^= K1_2; x3_3 ^= K1_3; x3_4 ^= K1_4; x3_5 ^= K1_5; x3_6 ^= K1_6; x3_7 ^= K1_7; P12_8; x3_0 ^= K0_0; x3_1 ^= K0_1; x3_2 ^= K0_2; x3_3 ^= K0_3; x3_4 ^= K0_4; x3_5 ^= K0_5; x3_6 ^= K0_6; x3_7 ^= K0_7; x4_0 ^= K1_0; x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; x4_4 ^= K1_4; x4_5 ^= K1_5; x4_6 ^= K1_6; x4_7 ^= K1_7; /* return -1 if verification fails */ ret_val = 0; EXPAND_BYTE_ARRAY_8(buffer, x3_7, x3_6, x3_5, x3_4, x3_3, x3_2, x3_1, x3_0); for (i = 0; i < 8; ++i, ++c) ret_val |= *c ^ buffer[i]; EXPAND_BYTE_ARRAY_8(buffer, x4_7, x4_6, x4_5, x4_4, x4_3, x4_2, x4_1, x4_0); for (i = 0; i < 8; ++i, ++c) ret_val |= *c ^ buffer[i]; if (ret_val != 0) return -1; /* return plaintext */ *mlen = clen - CRYPTO_ABYTES; return 0; }