#include #include "api.h" #include "endian.h" #define PA_ROUNDS 12 #define PB_ROUNDS 8 #define ROR16(x, n) (((x) >> (n)) | ((x) << (16 - (n)))) #define ROL16(x, n) (((x) << (n)) | ((x) >> (16 - (n)))) #define COMPRESS_LONG_16(x) \ do { \ x &= 0x1111; \ x = (x | (x >> 3)) & 0x0303; \ x = (x | (x >> 6)) & 0x000f; \ } while (0) #define COMPRESS_U16(var, var_3, var_2, var_1, var_0) \ do { \ /* var 16-bit, and var_0/1/2/3 4-bit */ \ var_0 = var; \ var_1 = var_0 >> 1; \ var_2 = var_1 >> 1; \ var_3 = var_2 >> 1; \ COMPRESS_LONG_16(var_0); \ COMPRESS_LONG_16(var_1); \ COMPRESS_LONG_16(var_2); \ COMPRESS_LONG_16(var_3); \ } while (0) #define COMPRESS_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ do { \ COMPRESS_U16(U16BIG(((uint16_t*)(a))[3]), var_3, var_2, var_1, var_0); \ COMPRESS_U16(U16BIG(((uint16_t*)(a))[2]), t1_3, t1_2, t1_1, t1_0); \ var_0 |= t1_0 << 4; \ var_1 |= t1_1 << 4; \ var_2 |= t1_2 << 4; \ var_3 |= t1_3 << 4; \ COMPRESS_U16(U16BIG(((uint16_t*)(a))[1]), t1_3, t1_2, t1_1, t1_0); \ var_0 |= t1_0 << 8; \ var_1 |= t1_1 << 8; \ var_2 |= t1_2 << 8; \ var_3 |= t1_3 << 8; \ COMPRESS_U16(U16BIG(((uint16_t*)(a))[0]), t1_3, t1_2, t1_1, t1_0); \ var_0 |= t1_0 << 12; \ var_1 |= t1_1 << 12; \ var_2 |= t1_2 << 12; \ var_3 |= t1_3 << 12; \ } while (0) #define EXPAND_SHORT_16(x) \ do { \ x &= 0x000f; \ x = (x | (x << 6)) & 0x0303; \ x = (x | (x << 3)) & 0x1111; \ } while (0) #define EXPAND_U16(var, var_3, var_2, var_1, var_0) \ do { \ /* var 16-bit, and var_0/1/2/3 4-bit */ \ t0_0 = var_0; \ t0_1 = var_1; \ t0_2 = var_2; \ t0_3 = var_3; \ EXPAND_SHORT_16(t0_0); \ EXPAND_SHORT_16(t0_1); \ EXPAND_SHORT_16(t0_2); \ EXPAND_SHORT_16(t0_3); \ *var = U16BIG(t0_0 | (t0_1 << 1) | (t0_2 << 2) | (t0_3 << 3)); \ } while (0) #define EXPAND_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ do { \ EXPAND_U16((((uint16_t*)(a)) + 3), var_3, var_2, var_1, var_0); \ t1_3 = var_3 >> 4; \ t1_2 = var_2 >> 4; \ t1_1 = var_1 >> 4; \ t1_0 = var_0 >> 4; \ EXPAND_U16((((uint16_t*)(a)) + 2), t1_3, t1_2, t1_1, t1_0); \ t1_3 >>= 4; \ t1_2 >>= 4; \ t1_1 >>= 4; \ t1_0 >>= 4; \ EXPAND_U16((((uint16_t*)(a)) + 1), t1_3, t1_2, t1_1, t1_0); \ t1_3 >>= 4; \ t1_2 >>= 4; \ t1_1 >>= 4; \ t1_0 >>= 4; \ EXPAND_U16((((uint16_t*)(a)) + 0), t1_3, t1_2, t1_1, t1_0); \ } while (0) /* This way of implementing Ascon's S-box was inpired by personal communication with Joan Daemen about implementing the 3-bit chi layer. */ #define ROUND_16(C_3, C_2, C_1, C_0) \ do { \ /* round constant */ \ x2_0 ^= C_0; \ x2_1 ^= C_1; \ x2_2 ^= C_2; \ x2_3 ^= C_3; \ /* s-box layer */ \ x0_0 ^= x4_0; \ x4_0 ^= x3_0; \ x2_0 ^= x1_0; \ t0_0 = x0_0 & (~x4_0); \ t1_0 = x2_0 & (~x1_0); \ x0_0 ^= t1_0; \ t1_0 = x4_0 & (~x3_0); \ x2_0 ^= t1_0; \ t1_0 = x1_0 & (~x0_0); \ x4_0 ^= t1_0; \ t1_0 = x3_0 & (~x2_0); \ x1_0 ^= t1_0; \ x3_0 ^= t0_0; \ x1_0 ^= x0_0; \ x3_0 ^= x2_0; \ x0_0 ^= x4_0; \ x2_0 = ~x2_0; \ x0_1 ^= x4_1; \ x4_1 ^= x3_1; \ x2_1 ^= x1_1; \ t0_0 = x0_1 & (~x4_1); \ t1_0 = x2_1 & (~x1_1); \ x0_1 ^= t1_0; \ t1_0 = x4_1 & (~x3_1); \ x2_1 ^= t1_0; \ t1_0 = x1_1 & (~x0_1); \ x4_1 ^= t1_0; \ t1_0 = x3_1 & (~x2_1); \ x1_1 ^= t1_0; \ x3_1 ^= t0_0; \ x1_1 ^= x0_1; \ x3_1 ^= x2_1; \ x0_1 ^= x4_1; \ x2_1 = ~x2_1; \ x0_2 ^= x4_2; \ x4_2 ^= x3_2; \ x2_2 ^= x1_2; \ t0_0 = x0_2 & (~x4_2); \ t1_0 = x2_2 & (~x1_2); \ x0_2 ^= t1_0; \ t1_0 = x4_2 & (~x3_2); \ x2_2 ^= t1_0; \ t1_0 = x1_2 & (~x0_2); \ x4_2 ^= t1_0; \ t1_0 = x3_2 & (~x2_2); \ x1_2 ^= t1_0; \ x3_2 ^= t0_0; \ x1_2 ^= x0_2; \ x3_2 ^= x2_2; \ x0_2 ^= x4_2; \ x2_2 = ~x2_2; \ x0_3 ^= x4_3; \ x4_3 ^= x3_3; \ x2_3 ^= x1_3; \ t0_0 = x0_3 & (~x4_3); \ t1_0 = x2_3 & (~x1_3); \ x0_3 ^= t1_0; \ t1_0 = x4_3 & (~x3_3); \ x2_3 ^= t1_0; \ t1_0 = x1_3 & (~x0_3); \ x4_3 ^= t1_0; \ t1_0 = x3_3 & (~x2_3); \ x1_3 ^= t1_0; \ x3_3 ^= t0_0; \ x1_3 ^= x0_3; \ x3_3 ^= x2_3; \ x0_3 ^= x4_3; \ x2_3 = ~x2_3; \ /* linear layer */ \ t0_0 = x0_0; \ t0_1 = x0_1; \ t0_2 = x0_2; \ t0_3 = x0_3; \ x0_1 ^= ROR16(t0_0, 5); \ x0_2 ^= ROR16(t0_1, 5); \ x0_3 ^= ROR16(t0_2, 5); \ x0_0 ^= ROR16(t0_3, 4); \ x0_0 ^= ROR16(t0_0, 7); \ x0_1 ^= ROR16(t0_1, 7); \ x0_2 ^= ROR16(t0_2, 7); \ x0_3 ^= ROR16(t0_3, 7); \ t0_0 = x1_0; \ t0_1 = x1_1; \ t0_2 = x1_2; \ t0_3 = x1_3; \ x1_3 ^= t0_0; \ x1_0 ^= ROL16(t0_1, 1); \ x1_1 ^= ROL16(t0_2, 1); \ x1_2 ^= ROL16(t0_3, 1); \ x1_1 ^= ROL16(t0_0, 6); \ x1_2 ^= ROL16(t0_1, 6); \ x1_3 ^= ROL16(t0_2, 6); \ x1_0 ^= ROL16(t0_3, 7); \ t0_0 = x2_0; \ t0_1 = x2_1; \ t0_2 = x2_2; \ t0_3 = x2_3; \ x2_3 ^= ROR16(t0_0, 1); \ x2_0 ^= t0_1; \ x2_1 ^= t0_2; \ x2_2 ^= t0_3; \ x2_2 ^= ROR16(t0_0, 2); \ x2_3 ^= ROR16(t0_1, 2); \ x2_0 ^= ROR16(t0_2, 1); \ x2_1 ^= ROR16(t0_3, 1); \ t0_0 = x3_0; \ t0_1 = x3_1; \ t0_2 = x3_2; \ t0_3 = x3_3; \ x3_2 ^= ROR16(t0_0, 3); \ x3_3 ^= ROR16(t0_1, 3); \ x3_0 ^= ROR16(t0_2, 2); \ x3_1 ^= ROR16(t0_3, 2); \ x3_3 ^= ROR16(t0_0, 5); \ x3_0 ^= ROR16(t0_1, 4); \ x3_1 ^= ROR16(t0_2, 4); \ x3_2 ^= ROR16(t0_3, 4); \ t0_0 = x4_0; \ t0_1 = x4_1; \ t0_2 = x4_2; \ t0_3 = x4_3; \ x4_1 ^= ROR16(t0_0, 2); \ x4_2 ^= ROR16(t0_1, 2); \ x4_3 ^= ROR16(t0_2, 2); \ x4_0 ^= ROR16(t0_3, 1); \ x4_3 ^= ROL16(t0_0, 5); \ x4_0 ^= ROL16(t0_1, 6); \ x4_1 ^= ROL16(t0_2, 6); \ x4_2 ^= ROL16(t0_3, 6); \ } while (0) #define P12_16 \ do { \ ROUND_16(2, 2, 2, 2); \ ROUND_16(2, 2, 2, 1); \ ROUND_16(2, 2, 1, 2); \ ROUND_16(2, 2, 1, 1); \ ROUND_16(2, 1, 2, 2); \ ROUND_16(2, 1, 2, 1); \ ROUND_16(2, 1, 1, 2); \ ROUND_16(2, 1, 1, 1); \ ROUND_16(1, 2, 2, 2); \ ROUND_16(1, 2, 2, 1); \ ROUND_16(1, 2, 1, 2); \ ROUND_16(1, 2, 1, 1); \ } while (0) #define P8_16 \ do { \ ROUND_16(2, 1, 2, 2); \ ROUND_16(2, 1, 2, 1); \ ROUND_16(2, 1, 1, 2); \ ROUND_16(2, 1, 1, 1); \ ROUND_16(1, 2, 2, 2); \ ROUND_16(1, 2, 2, 1); \ ROUND_16(1, 2, 1, 2); \ ROUND_16(1, 2, 1, 1); \ } while (0) int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, uint64_t mlen, const uint8_t* ad, uint64_t adlen, const uint8_t* nsec, const uint8_t* npub, const uint8_t* k) { uint64_t rlen; uint64_t i; uint8_t buffer[16]; uint16_t K0_0; uint16_t K1_0; uint16_t N0_0; uint16_t N1_0; uint16_t x0_0, x1_0, x2_0, x3_0, x4_0; uint16_t t0_0, t1_0; uint16_t K0_1; uint16_t K1_1; uint16_t N0_1; uint16_t N1_1; uint16_t x0_1, x1_1, x2_1, x3_1, x4_1; uint16_t t0_1, t1_1; uint16_t K0_2; uint16_t K1_2; uint16_t N0_2; uint16_t N1_2; uint16_t x0_2, x1_2, x2_2, x3_2, x4_2; uint16_t t0_2, t1_2; uint16_t K0_3; uint16_t K1_3; uint16_t N0_3; uint16_t N1_3; uint16_t x0_3, x1_3, x2_3, x3_3, x4_3; uint16_t t0_3, t1_3; uint16_t in_0, in_1, in_2, in_3; (void)nsec; COMPRESS_BYTE_ARRAY_16(k, K0_3, K0_2, K0_1, K0_0); COMPRESS_BYTE_ARRAY_16(k + 8, K1_3, K1_2, K1_1, K1_0); COMPRESS_BYTE_ARRAY_16(npub, N0_3, N0_2, N0_1, N0_0); COMPRESS_BYTE_ARRAY_16(npub + 8, N1_3, N1_2, N1_1, N1_0); /* initialization */ t1_0 = (uint16_t)((CRYPTO_KEYBYTES * 8) << 8 | (ASCON_RATE * 8) << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; COMPRESS_LONG_16(t1_0); COMPRESS_LONG_16(t1_1); COMPRESS_LONG_16(t1_2); COMPRESS_LONG_16(t1_3); x0_0 = t1_0 << 12; x0_1 = t1_1 << 12; x0_2 = t1_2 << 12; x0_3 = t1_3 << 12; t1_0 = (uint16_t)(PA_ROUNDS << 8 | PB_ROUNDS << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; COMPRESS_LONG_16(t1_0); COMPRESS_LONG_16(t1_1); COMPRESS_LONG_16(t1_2); COMPRESS_LONG_16(t1_3); x0_0 |= t1_0 << 8; x0_1 |= t1_1 << 8; x0_2 |= t1_2 << 8; x0_3 |= t1_3 << 8; x1_0 = K0_0; x1_1 = K0_1; x1_2 = K0_2; x1_3 = K0_3; x2_0 = K1_0; x2_1 = K1_1; x2_2 = K1_2; x2_3 = K1_3; x3_0 = N0_0; x3_1 = N0_1; x3_2 = N0_2; x3_3 = N0_3; x4_0 = N1_0; x4_1 = N1_1; x4_2 = N1_2; x4_3 = N1_3; P12_16; x3_0 ^= K0_0; x3_1 ^= K0_1; x3_2 ^= K0_2; x3_3 ^= K0_3; x4_0 ^= K1_0; x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; /* process associated data */ if (adlen) { rlen = adlen; while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_16(ad, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; COMPRESS_BYTE_ARRAY_16(ad + 8, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; P8_16; rlen -= ASCON_RATE; ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; for (i = rlen + 1; i < 16; ++i) buffer[i] = 0; COMPRESS_BYTE_ARRAY_16(buffer, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; COMPRESS_BYTE_ARRAY_16(buffer + 8, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; P8_16; } x4_0 ^= 1; /* process plaintext */ rlen = mlen; while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_16(m, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; COMPRESS_BYTE_ARRAY_16(m + 8, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; EXPAND_BYTE_ARRAY_16(c, x0_3, x0_2, x0_1, x0_0); EXPAND_BYTE_ARRAY_16(c + 8, x1_3, x1_2, x1_1, x1_0); P8_16; rlen -= ASCON_RATE; m += ASCON_RATE; c += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++m) buffer[i] = *m; buffer[rlen] = 0x80; for (i = rlen + 1; i < 16; ++i) buffer[i] = 0; COMPRESS_BYTE_ARRAY_16(buffer, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; COMPRESS_BYTE_ARRAY_16(buffer + 8, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; EXPAND_BYTE_ARRAY_16(buffer, x0_3, x0_2, x0_1, x0_0); EXPAND_BYTE_ARRAY_16(buffer + 8, x1_3, x1_2, x1_1, x1_0); for (i = 0; i < rlen; ++i, ++c) *c = buffer[i]; /* finalization */ x2_0 ^= K0_0; x2_1 ^= K0_1; x2_2 ^= K0_2; x2_3 ^= K0_3; x3_0 ^= K1_0; x3_1 ^= K1_1; x3_2 ^= K1_2; x3_3 ^= K1_3; P12_16; x3_0 ^= K0_0; x3_1 ^= K0_1; x3_2 ^= K0_2; x3_3 ^= K0_3; x4_0 ^= K1_0; x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; /* return tag */ EXPAND_BYTE_ARRAY_16(c, x3_3, x3_2, x3_1, x3_0); c += 8; EXPAND_BYTE_ARRAY_16(c, x4_3, x4_2, x4_1, x4_0); *clen = mlen + CRYPTO_ABYTES; return 0; } int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, const uint8_t* c, uint64_t clen, const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* k) { *mlen = 0; if (clen < CRYPTO_ABYTES) return -1; uint16_t ret_val; uint64_t rlen; uint64_t i; uint8_t buffer[16]; uint16_t K0_0; uint16_t K1_0; uint16_t N0_0; uint16_t N1_0; uint16_t x0_0, x1_0, x2_0, x3_0, x4_0; uint16_t t0_0, t1_0; uint16_t K0_1; uint16_t K1_1; uint16_t N0_1; uint16_t N1_1; uint16_t x0_1, x1_1, x2_1, x3_1, x4_1; uint16_t t0_1, t1_1; uint16_t K0_2; uint16_t K1_2; uint16_t N0_2; uint16_t N1_2; uint16_t x0_2, x1_2, x2_2, x3_2, x4_2; uint16_t t0_2, t1_2; uint16_t K0_3; uint16_t K1_3; uint16_t N0_3; uint16_t N1_3; uint16_t x0_3, x1_3, x2_3, x3_3, x4_3; uint16_t t0_3, t1_3; uint16_t in_0, in_1, in_2, in_3; (void)nsec; COMPRESS_BYTE_ARRAY_16(k, K0_3, K0_2, K0_1, K0_0); COMPRESS_BYTE_ARRAY_16(k + 8, K1_3, K1_2, K1_1, K1_0); COMPRESS_BYTE_ARRAY_16(npub, N0_3, N0_2, N0_1, N0_0); COMPRESS_BYTE_ARRAY_16(npub + 8, N1_3, N1_2, N1_1, N1_0); /* initialization */ t1_0 = (uint16_t)((CRYPTO_KEYBYTES * 8) << 8 | (ASCON_RATE * 8) << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; COMPRESS_LONG_16(t1_0); COMPRESS_LONG_16(t1_1); COMPRESS_LONG_16(t1_2); COMPRESS_LONG_16(t1_3); x0_0 = t1_0 << 12; x0_1 = t1_1 << 12; x0_2 = t1_2 << 12; x0_3 = t1_3 << 12; t1_0 = (uint16_t)(PA_ROUNDS << 8 | PB_ROUNDS << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; COMPRESS_LONG_16(t1_0); COMPRESS_LONG_16(t1_1); COMPRESS_LONG_16(t1_2); COMPRESS_LONG_16(t1_3); x0_0 |= t1_0 << 8; x0_1 |= t1_1 << 8; x0_2 |= t1_2 << 8; x0_3 |= t1_3 << 8; x1_0 = K0_0; x1_1 = K0_1; x1_2 = K0_2; x1_3 = K0_3; x2_0 = K1_0; x2_1 = K1_1; x2_2 = K1_2; x2_3 = K1_3; x3_0 = N0_0; x3_1 = N0_1; x3_2 = N0_2; x3_3 = N0_3; x4_0 = N1_0; x4_1 = N1_1; x4_2 = N1_2; x4_3 = N1_3; P12_16; x3_0 ^= K0_0; x3_1 ^= K0_1; x3_2 ^= K0_2; x3_3 ^= K0_3; x4_0 ^= K1_0; x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; /* process associated data */ if (adlen) { rlen = adlen; while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_16(ad, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; COMPRESS_BYTE_ARRAY_16(ad + 8, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; P8_16; rlen -= ASCON_RATE; ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; for (i = rlen + 1; i < 16; ++i) buffer[i] = 0; COMPRESS_BYTE_ARRAY_16(buffer, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; COMPRESS_BYTE_ARRAY_16(buffer + 8, in_3, in_2, in_1, in_0); x1_0 ^= in_0; x1_1 ^= in_1; x1_2 ^= in_2; x1_3 ^= in_3; P8_16; } x4_0 ^= 1; /* process plaintext */ rlen = clen - CRYPTO_KEYBYTES; while (rlen >= ASCON_RATE) { EXPAND_U16(&t1_0, x0_3, x0_2, x0_1, x0_0); EXPAND_U16(&t1_1, x0_3 >> 4, x0_2 >> 4, x0_1 >> 4, x0_0 >> 4); EXPAND_U16(&t1_2, x0_3 >> 8, x0_2 >> 8, x0_1 >> 8, x0_0 >> 8); EXPAND_U16(&t1_3, x0_3 >> 12, x0_2 >> 12, x0_1 >> 12, x0_0 >> 12); ((uint16_t*)m)[0] = (t1_3) ^ ((uint16_t*)c)[0]; ((uint16_t*)m)[1] = (t1_2) ^ ((uint16_t*)c)[1]; ((uint16_t*)m)[2] = (t1_1) ^ ((uint16_t*)c)[2]; ((uint16_t*)m)[3] = (t1_0) ^ ((uint16_t*)c)[3]; EXPAND_U16(&t1_0, x1_3, x1_2, x1_1, x1_0); EXPAND_U16(&t1_1, x1_3 >> 4, x1_2 >> 4, x1_1 >> 4, x1_0 >> 4); EXPAND_U16(&t1_2, x1_3 >> 8, x1_2 >> 8, x1_1 >> 8, x1_0 >> 8); EXPAND_U16(&t1_3, x1_3 >> 12, x1_2 >> 12, x1_1 >> 12, x1_0 >> 12); ((uint16_t*)m)[4] = (t1_3) ^ ((uint16_t*)c)[4]; ((uint16_t*)m)[5] = (t1_2) ^ ((uint16_t*)c)[5]; ((uint16_t*)m)[6] = (t1_1) ^ ((uint16_t*)c)[6]; ((uint16_t*)m)[7] = (t1_0) ^ ((uint16_t*)c)[7]; COMPRESS_BYTE_ARRAY_16(c, x0_3, x0_2, x0_1, x0_0); COMPRESS_BYTE_ARRAY_16(c + 8, x1_3, x1_2, x1_1, x1_0); P8_16; rlen -= ASCON_RATE; m += ASCON_RATE; c += ASCON_RATE; } EXPAND_BYTE_ARRAY_16(buffer, x0_3, x0_2, x0_1, x0_0); EXPAND_BYTE_ARRAY_16(buffer + 8, x1_3, x1_2, x1_1, x1_0); for (i = 0; i < rlen; ++i, ++m, ++c) { *m = buffer[i] ^ *c; buffer[i] = *c; } buffer[rlen] ^= 0x80; COMPRESS_BYTE_ARRAY_16(buffer, x0_3, x0_2, x0_1, x0_0); COMPRESS_BYTE_ARRAY_16(buffer + 8, x1_3, x1_2, x1_1, x1_0); /* finalization */ x2_0 ^= K0_0; x2_1 ^= K0_1; x2_2 ^= K0_2; x2_3 ^= K0_3; x3_0 ^= K1_0; x3_1 ^= K1_1; x3_2 ^= K1_2; x3_3 ^= K1_3; P12_16; x3_0 ^= K0_0; x3_1 ^= K0_1; x3_2 ^= K0_2; x3_3 ^= K0_3; x4_0 ^= K1_0; x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; /* return -1 if verification fails */ ret_val = 0; EXPAND_U16(&t1_0, x3_3, x3_2, x3_1, x3_0); EXPAND_U16(&t1_1, x3_3 >> 4, x3_2 >> 4, x3_1 >> 4, x3_0 >> 4); EXPAND_U16(&t1_2, x3_3 >> 8, x3_2 >> 8, x3_1 >> 8, x3_0 >> 8); EXPAND_U16(&t1_3, x3_3 >> 12, x3_2 >> 12, x3_1 >> 12, x3_0 >> 12); ret_val |= ((uint16_t*)c)[0] ^ (t1_3); ret_val |= ((uint16_t*)c)[1] ^ (t1_2); ret_val |= ((uint16_t*)c)[2] ^ (t1_1); ret_val |= ((uint16_t*)c)[3] ^ (t1_0); EXPAND_U16(&t1_0, x4_3, x4_2, x4_1, x4_0); EXPAND_U16(&t1_1, x4_3 >> 4, x4_2 >> 4, x4_1 >> 4, x4_0 >> 4); EXPAND_U16(&t1_2, x4_3 >> 8, x4_2 >> 8, x4_1 >> 8, x4_0 >> 8); EXPAND_U16(&t1_3, x4_3 >> 12, x4_2 >> 12, x4_1 >> 12, x4_0 >> 12); ret_val |= ((uint16_t*)c)[4] ^ (t1_3); ret_val |= ((uint16_t*)c)[5] ^ (t1_2); ret_val |= ((uint16_t*)c)[6] ^ (t1_1); ret_val |= ((uint16_t*)c)[7] ^ (t1_0); if (ret_val != 0) return -1; /* return plaintext */ *mlen = clen - CRYPTO_ABYTES; return 0; }