Commit 2f8aa5e2 by Robert Primas Committed by Sebastian Renner

isap 3rd round optimizations

parent 07f5a6f4
### Folder Structure:
**Folder Structure:**
* **isapa128av20**
* Code for ISAP128a using Ascon-p.
* **isapa128v20**
* Code for ISAP128 using Ascon-p.
* **isapk128av20**
* Code for ISAP128a using Keccak-p[400].
* **isapk128v20**
* Code for ISAP128 using Keccak-p[400].
* **isapxv20**
* A common codebase that can be used to build reference code for all four instances of ISAP.
- `isapa128av20`: Code for ISAP128a using Ascon-p (primary recommendation).
- `isapa128v20`: Code for ISAP128 using Ascon-p.
- `isapk128av20`: Code for ISAP128a using Keccak-p[400].
- `isapk128v20`: Code for ISAP128 using Keccak-p[400].
- `isapxv20`: A generic codebase that can be used to build reference code for all four instances of ISAP.
#ifndef API_H
#define API_H
#define CRYPTO_VERSION "2.0.1"
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#include "api.h"
#include "isap.h"
#include "crypto_aead.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext and Generate tag
unsigned char *tag = c+mlen;
isap_mac_enc(k,npub,ad,adlen,c,mlen,m,mlen,tag);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Plaintext length is clen - tag length
*mlen = clen-ISAP_TAG_SZ;
// Generate tag
unsigned char tag[ISAP_TAG_SZ];
isap_mac(k,npub,ad,adlen,c,*mlen,tag);
// Compare tag
unsigned long eq_cnt = 0;
for(unsigned int i = 0; i < ISAP_TAG_SZ; i++) {
eq_cnt += (tag[i] == c[(*mlen)+i]);
}
// Perform decryption if tag is correct
if(eq_cnt == (unsigned long)ISAP_TAG_SZ){
if (*mlen > 0) {
isap_enc(k,npub,c,*mlen,m);
}
return 0;
} else {
return -1;
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH+7)/8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long long adlen,
const unsigned char *c, const unsigned long long clen,
unsigned char *tag
);
void isap_mac_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long long adlen,
unsigned char *c, const unsigned long long clen,
const unsigned char *m, const unsigned long long mlen,
unsigned char *tag
);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long long inlen,
unsigned char *out,
const unsigned long long outlen
);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long long mlen,
unsigned char *c
);
#endif
#ifndef API_H
#define API_H
// isapa128a
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
// asconhashv12
#define CRYPTO_BYTES 32
#define ASCON_HASH_OUTLEN 32
#define ASCON_HASH_ROUNDS 12
#endif
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH P12ROUNDS(s)
#define P_sB P1ROUNDS(s)
#define P_sE P6ROUNDS(s)
#define P_sK P12ROUNDS(s)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) to_bit_interleaving(U64BIG(x))
#define WORDTOU64(x) U64BIG(from_bit_interleaving(x))
/* ---------------------------------------------------------------- */
#define TOBI(x) to_bit_interleaving(x)
#define FROMBI(x) from_bit_interleaving(x)
/* ---------------------------------------------------------------- */
forceinline lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
forceinline lane_t to_bit_interleaving(lane_t in)
{
uint32_t lo = in.w[0];
uint32_t hi = in.w[1];
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
lane_t out;
out.w[0] = (lo & 0x0000FFFF) | (hi << 16);
out.w[1] = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
forceinline lane_t from_bit_interleaving(lane_t in)
{
uint32_t lo = ((in).w[0] & 0x0000FFFF) | ((in).w[1] << 16);
uint32_t hi = ((in).w[0] >> 16) | ((in).w[1] & 0xFFFF0000);
uint32_t r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
lane_t out;
out.x = (uint64_t)hi << 32 | lo;
return out;
}
/* ---------------------------------------------------------------- */
forceinline uint32_t ROR32(uint32_t x, int n)
{
return x >> n | x << (-n & 31);
}
/* ---------------------------------------------------------------- */
forceinline uint64_t ROR(uint64_t x, int n)
{
lane_t b, a = {.x = x};
b.w[0] = (n % 2) ? ROR32(a.w[1], (n - 1) / 2) : ROR32(a.w[0], n / 2);
b.w[1] = (n % 2) ? ROR32(a.w[0], (n + 1) / 2) : ROR32(a.w[1], n / 2);
return b.x;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint64_t C)
{
state_t t;
/* round constant */
s->x[2] ^= C;
/* s-box layer */
s->x[0] ^= s->x[4];
s->x[4] ^= s->x[3];
s->x[2] ^= s->x[1];
t.x[0] = s->x[0] ^ (~s->x[1] & s->x[2]);
t.x[2] = s->x[2] ^ (~s->x[3] & s->x[4]);
t.x[4] = s->x[4] ^ (~s->x[0] & s->x[1]);
t.x[1] = s->x[1] ^ (~s->x[2] & s->x[3]);
t.x[3] = s->x[3] ^ (~s->x[4] & s->x[0]);
t.x[1] ^= t.x[0];
t.x[3] ^= t.x[2];
t.x[0] ^= t.x[4];
/* linear layer */
s->x[2] = t.x[2] ^ ROR(t.x[2], 6 - 1);
s->x[3] = t.x[3] ^ ROR(t.x[3], 17 - 10);
s->x[4] = t.x[4] ^ ROR(t.x[4], 41 - 7);
s->x[0] = t.x[0] ^ ROR(t.x[0], 28 - 19);
s->x[1] = t.x[1] ^ ROR(t.x[1], 61 - 39);
s->x[2] = t.x[2] ^ ROR(s->x[2], 1);
s->x[3] = t.x[3] ^ ROR(s->x[3], 10);
s->x[4] = t.x[4] ^ ROR(s->x[4], 7);
s->x[0] = t.x[0] ^ ROR(s->x[0], 19);
s->x[1] = t.x[1] ^ ROR(s->x[1], 39);
s->x[2] = ~s->x[2];
}
/* ---------------------------------------------------------------- */
forceinline void P12ROUNDS(state_t *s)
{
ROUND(s, 0xc0000000c);
ROUND(s, 0xc00000009);
ROUND(s, 0x90000000c);
ROUND(s, 0x900000009);
ROUND(s, 0xc00000006);
ROUND(s, 0xc00000003);
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
forceinline void P6ROUNDS(state_t *s)
{
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
forceinline void P1ROUNDS(state_t *s)
{
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
#include "api.h"
#include "isap.h"
#include "crypto_aead.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Plaintext length is clen - tag length
*mlen = clen-ISAP_TAG_SZ;
// Generate tag
unsigned char tag[ISAP_TAG_SZ];
isap_mac(k,npub,ad,adlen,c,*mlen,tag);
// Compare tag
unsigned long eq_cnt = 0;
for(unsigned int i = 0; i < ISAP_TAG_SZ; i++) {
eq_cnt += (tag[i] == c[(*mlen)+i]);
}
// Perform decryption if tag is correct
if(eq_cnt == (unsigned long)ISAP_TAG_SZ){
if (*mlen > 0) {
isap_enc(k,npub,c,*mlen,m);
}
return 0;
} else {
return -1;
}
}
#ifndef FORCEINLINE_H_
#define FORCEINLINE_H_
/* define forceinline macro */
#ifdef _MSC_VER
#define forceinline __forceinline
#elif defined(__GNUC__)
#define forceinline inline __attribute__((__always_inline__))
#elif defined(__CLANG__)
#if __has_attribute(__always_inline__)
#define forceinline inline __attribute__((__always_inline__))
#else
#define forceinline inline
#endif
#else
#define forceinline inline
#endif
#endif /* FORCEINLINE_H_ */
#include <string.h>
#include <inttypes.h>
#include "api.h"
#include "isap.h"
#include "asconp.h"
#include "config.h"
forceinline void ABSORB_LANES(state_t *s, const uint8_t *src, uint64_t len)
{
while (len >= 8)
{
// Absorb full lanes
lane_t t0 = U64TOWORD(*(lane_t *)(src + 0));
s->x[0] ^= t0.x;
len -= ISAP_rH / 8;
src += ISAP_rH / 8;
P_sH;
}
if (len > 0)
{
// Absorb partial lane and padding
size_t i;
lane_t t0 = {0};
for (i = 0; i < len; i++)
{
t0.b[7 - i] ^= *src;
src++;
}
t0.b[7 - i] ^= 0x80;
t0 = TOBI(t0);
s->x[0] ^= t0.x;
P_sH;
}
else
{
// Absorb padded empty lane
s->b[0][7] ^= 0x80;
P_sH;
}
}
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const uint8_t *k,
const uint8_t *iv,
const uint8_t *y,
state_t *out,
const size_t outlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(k + 0));
s->l[1] = U64TOWORD(*(lane_t *)(k + 8));
s->l[2] = U64TOWORD(*(lane_t *)(iv + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sK;
// Absorb Y, bit by bit
for (size_t i = 0; i < 16; i++)
{
uint8_t y_byte = *y;
s->b[0][7] ^= (y_byte & 0x80) << 0;
P_sB;
s->b[0][7] ^= (y_byte & 0x40) << 1;
P_sB;
s->b[0][7] ^= (y_byte & 0x20) << 2;
P_sB;
s->b[0][7] ^= (y_byte & 0x10) << 3;
P_sB;
s->b[0][7] ^= (y_byte & 0x08) << 4;
P_sB;
s->b[0][7] ^= (y_byte & 0x04) << 5;
P_sB;
s->b[0][7] ^= (y_byte & 0x02) << 6;
P_sB;
s->b[0][7] ^= (y_byte & 0x01) << 7;
if (i != 15)
{
P_sB;
y += 1;
}
}
// Squeeze K*
P_sK;
out->x[0] = s->x[0];
out->x[1] = s->x[1];
if (outlen > 16)
{
out->x[2] = s->x[2];
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, uint64_t adlen,
const uint8_t *c, uint64_t clen,
uint8_t *tag)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[1] = U64TOWORD(*(lane_t *)(npub + 8));
s->l[2] = U64TOWORD(*(lane_t *)(ISAP_IV_A + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb associated data
ABSORB_LANES(s, ad, adlen);
// Domain seperation
s->w[4][0] ^= 0x1UL;
// Absorb ciphertext
ABSORB_LANES(s, c, clen);
// Derive KA*
s->l[0] = WORDTOU64(s->l[0]);
s->l[1] = WORDTOU64(s->l[1]);
isap_rk(k, ISAP_IV_KA, (const uint8_t *)(s->b), s, CRYPTO_KEYBYTES);
// Squeeze tag
P_sH;
lane_t t0 = WORDTOU64(s->l[0]);
memcpy(tag + 0, t0.b, 8);
t0 = WORDTOU64(s->l[1]);
memcpy(tag + 8, t0.b, 8);
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, uint64_t mlen,
uint8_t *c)
{
state_t state;
state_t *s = &state;
// Init state
isap_rk(k, ISAP_IV_KE, npub, s, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
s->l[3] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[4] = U64TOWORD(*(lane_t *)(npub + 8));
while (mlen >= ISAP_rH / 8)
{
// Encrypt full lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)c = *(uint64_t *)m ^ t0.x;
mlen -= ISAP_rH / 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
if (mlen > 0)
{
// Encrypt partial lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
for (uint8_t i = 0; i < mlen; i++)
{
*c = *m ^ t0.b[i];
m += 1;
c += 1;
}
}
}
/******************************************************************************/
/* Ascon-Hash */
/******************************************************************************/
#if ENABLE_HASH == 1
int crypto_hash(uint8_t *out, const uint8_t *in, unsigned long long inlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(ASCON_HASH_IV + 0));
s->x[1] = 0;
s->x[2] = 0;
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb input
ABSORB_LANES(s, in, inlen);
for (size_t i = 0; i < 4; i++)
{
// Squeeze full lanes
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)(out + 8 * i) = t0.x;
if (i < 3)
{
P_sH;
}
}