Commit 2f8aa5e2 by Robert Primas Committed by Sebastian Renner

isap 3rd round optimizations

parent 07f5a6f4
### Folder Structure: **Folder Structure:**
* **isapa128av20** - `isapa128av20`: Code for ISAP128a using Ascon-p (primary recommendation).
* Code for ISAP128a using Ascon-p. - `isapa128v20`: Code for ISAP128 using Ascon-p.
* **isapa128v20** - `isapk128av20`: Code for ISAP128a using Keccak-p[400].
* Code for ISAP128 using Ascon-p. - `isapk128v20`: Code for ISAP128 using Keccak-p[400].
* **isapk128av20** - `isapxv20`: A generic codebase that can be used to build reference code for all four instances of ISAP.
* Code for ISAP128a using Keccak-p[400].
* **isapk128v20**
* Code for ISAP128 using Keccak-p[400].
* **isapxv20**
* A common codebase that can be used to build reference code for all four instances of ISAP.
#ifndef API_H
#define API_H
#define CRYPTO_VERSION "2.0.1"
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#include "api.h"
#include "isap.h"
#include "crypto_aead.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext and Generate tag
unsigned char *tag = c+mlen;
isap_mac_enc(k,npub,ad,adlen,c,mlen,m,mlen,tag);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Plaintext length is clen - tag length
*mlen = clen-ISAP_TAG_SZ;
// Generate tag
unsigned char tag[ISAP_TAG_SZ];
isap_mac(k,npub,ad,adlen,c,*mlen,tag);
// Compare tag
unsigned long eq_cnt = 0;
for(unsigned int i = 0; i < ISAP_TAG_SZ; i++) {
eq_cnt += (tag[i] == c[(*mlen)+i]);
}
// Perform decryption if tag is correct
if(eq_cnt == (unsigned long)ISAP_TAG_SZ){
if (*mlen > 0) {
isap_enc(k,npub,c,*mlen,m);
}
return 0;
} else {
return -1;
}
}
#include <stdio.h>
#include <string.h>
#include <immintrin.h>
#include "api.h"
#include "isap.h"
typedef unsigned char u8;
typedef unsigned long long u64;
typedef unsigned long u32;
typedef long long i64;
const u8 ISAP_IV1[] = {0x01,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const u8 ISAP_IV2[] = {0x02,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const u8 ISAP_IV3[] = {0x03,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define ROTR(x,n) (((x)>>(n))|((x)<<(64-(n))))
#define EXT_BYTE(x,n) ((u8)((u64)(x)>>(8*(7-(n)))))
#define INS_BYTE(x,n) ((u64)(x)<<(8*(7-(n))))
#define U64BIG(x) \
((ROTR(x, 8) & (0xFF000000FF000000ULL)) | \
(ROTR(x,24) & (0x00FF000000FF0000ULL)) | \
(ROTR(x,40) & (0x0000FF000000FF00ULL)) | \
(ROTR(x,56) & (0x000000FF000000FFULL)))
#define ROUND(C) ({\
x2 ^= C;\
x0 ^= x4;\
x4 ^= x3;\
x2 ^= x1;\
t0 = x0;\
t4 = x4;\
t3 = x3;\
t1 = x1;\
t2 = x2;\
x0 = t0 ^ ((~t1) & t2);\
x2 = t2 ^ ((~t3) & t4);\
x4 = t4 ^ ((~t0) & t1);\
x1 = t1 ^ ((~t2) & t3);\
x3 = t3 ^ ((~t4) & t0);\
x1 ^= x0;\
t1 = x1;\
x1 = ROTR(x1, R[1][0]);\
x3 ^= x2;\
t2 = x2;\
x2 = ROTR(x2, R[2][0]);\
t4 = x4;\
t2 ^= x2;\
x2 = ROTR(x2, R[2][1] - R[2][0]);\
t3 = x3;\
t1 ^= x1;\
x3 = ROTR(x3, R[3][0]);\
x0 ^= x4;\
x4 = ROTR(x4, R[4][0]);\
t3 ^= x3;\
x2 ^= t2;\
x1 = ROTR(x1, R[1][1] - R[1][0]);\
t0 = x0;\
x2 = ~x2;\
x3 = ROTR(x3, R[3][1] - R[3][0]);\
t4 ^= x4;\
x4 = ROTR(x4, R[4][1] - R[4][0]);\
x3 ^= t3;\
x1 ^= t1;\
x0 = ROTR(x0, R[0][0]);\
x4 ^= t4;\
t0 ^= x0;\
x0 = ROTR(x0, R[0][1] - R[0][0]);\
x0 ^= t0;\
})
#define ROUNDAVX(C1,C2) ({\
x2a = _mm_ternarylogic_epi64(x2a, _mm_set_epi64x (C1, C2), x1a, 0x96);\
x0a =_mm_xor_si128 (x0a, x4a);\
x4a =_mm_xor_si128 (x4a, x3a);\
t0a = x0a;\
t4a = x4a;\
t3a = x3a;\
t1a = x1a;\
t2a = x2a;\
x0a = _mm_ternarylogic_epi64(t0a, t1a, t2a, 0xd2);\
x2a = _mm_ternarylogic_epi64(t2a, t3a, t4a, 0xd2);\
x4a = _mm_ternarylogic_epi64(t4a, t0a, t1a, 0xd2);\
x1a = _mm_ternarylogic_epi64(t1a, t2a, t3a, 0xd2);\
x3a = _mm_ternarylogic_epi64(t3a, t4a, t0a, 0xd2);\
x1a =_mm_xor_si128 (x1a, x0a);\
x3a =_mm_xor_si128 (x3a, x2a);\
x0a =_mm_xor_si128 (x0a, x4a);\
t0a = _mm_ror_epi64(x0a, R[0][0]);\
t1a = _mm_ror_epi64(x0a, R[0][1]);\
t2a = _mm_ror_epi64(x1a, R[1][0]);\
t3a = _mm_ror_epi64(x1a, R[1][1]);\
x0a = _mm_ternarylogic_epi64(x0a, t0a, t1a, 0x96);\
x1a = _mm_ternarylogic_epi64(x1a, t2a, t3a, 0x96);\
t0a = _mm_ror_epi64(x2a, R[2][0]);\
t1a = _mm_ror_epi64(x2a, R[2][1]);\
t2a = _mm_ror_epi64(x3a, R[3][0]);\
t3a = _mm_ror_epi64(x3a, R[3][1]);\
x2a = _mm_ternarylogic_epi64(x2a, t0a, t1a, 0x69);\
x3a = _mm_ternarylogic_epi64(x3a, t2a, t3a, 0x96);\
t0a = _mm_ror_epi64(x4a, R[4][0]);\
t1a = _mm_ror_epi64(x4a, R[4][1]);\
x4a = _mm_ternarylogic_epi64(x4a, t0a, t1a, 0x96);\
})
#define P12 ({\
ROUND(0xf0);\
ROUND(0xe1);\
ROUND(0xd2);\
ROUND(0xc3);\
ROUND(0xb4);\
ROUND(0xa5);\
ROUND(0x96);\
ROUND(0x87);\
ROUND(0x78);\
ROUND(0x69);\
ROUND(0x5a);\
ROUND(0x4b);\
})
#define P6 ({\
ROUND(0x96);\
ROUND(0x87);\
ROUND(0x78);\
ROUND(0x69);\
ROUND(0x5a);\
ROUND(0x4b);\
})
#define P6_avx_first ({\
ROUNDAVX(0xf0,0x96);\
ROUNDAVX(0xe1,0x87);\
ROUNDAVX(0xd2,0x78);\
ROUNDAVX(0xc3,0x69);\
ROUNDAVX(0xb4,0x5a);\
ROUNDAVX(0xa5,0x4b);\
})
#define P6_avx_second ({\
ROUNDAVX(0x96,0x96);\
ROUNDAVX(0x87,0x87);\
ROUNDAVX(0x78,0x78);\
ROUNDAVX(0x69,0x69);\
ROUNDAVX(0x5a,0x5a);\
ROUNDAVX(0x4b,0x4b);\
})
#define P1 ({\
ROUND(0x4b);\
})
static const int R[5][2] = {
{19, 28}, {39, 61}, {1, 6}, {10, 17}, {7, 41}
};
#define ABSORB_LANES(src, len) ({ \
u32 rem_bytes = len; \
u64 *src64 = (u64 *)src; \
u32 idx64 = 0; \
while(1){ \
if(rem_bytes>ISAP_rH_SZ){ \
x0 ^= U64BIG(src64[idx64]); \
idx64++; \
P12; \
rem_bytes -= ISAP_rH_SZ; \
} else if(rem_bytes==ISAP_rH_SZ){ \
x0 ^= U64BIG(src64[idx64]); \
P12; \
x0 ^= 0x8000000000000000ULL; \
break; \
} else { \
u64 lane64; \
u8 *lane8 = (u8 *)&lane64; \
u32 idx8 = idx64*8; \
for (u32 i = 0; i < 8; i++) { \
if(i<(rem_bytes)){ \
lane8[i] = src[idx8]; \
idx8++; \
} else if(i==rem_bytes){ \
lane8[i] = 0x80; \
} else { \
lane8[i] = 0x00; \
} \
} \
x0 ^= U64BIG(lane64); \
break; \
} \
} \
})
/******************************************************************************/
/* IsapRk */
/******************************************************************************/
void isap_rk(
const u8 *k,
const u8 *iv,
const u8 *y,
const u64 ylen,
u8 *out,
const u64 outlen
){
const u64 *k64 = (u64 *)k;
const u64 *iv64 = (u64 *)iv;
u64 *out64 = (u64 *)out;
u64 x0, x1, x2, x3, x4;
u64 t0, t1, t2, t3, t4;
// Init state
t0 = t1 = t2 = t3 = t4 = 0;
x0 = U64BIG(k64[0]);
x1 = U64BIG(k64[1]);
x2 = U64BIG(iv64[0]);
x3 = x4 = 0;
P12;
// Absorb Y
for (size_t i = 0; i < ylen*8-1; i++){
size_t cur_byte_pos = i/8;
size_t cur_bit_pos = 7-(i%8);
u8 cur_bit = ((y[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7;
x0 ^= ((u64)cur_bit) << 56;
P1;
}
u8 cur_bit = ((y[ylen-1]) & 0x01) << 7;
x0 ^= ((u64)cur_bit) << 56;
P12;
// Extract K*
out64[0] = U64BIG(x0);
out64[1] = U64BIG(x1);
if(outlen == 24){
out64[2] = U64BIG(x2);
}
}
/******************************************************************************/
/* IsapMac */
/******************************************************************************/
void isap_mac(
const u8 *k,
const u8 *npub,
const u8 *ad, const u64 adlen,
const u8 *c, const u64 clen,
u8 *tag
){
u8 state[ISAP_STATE_SZ];
const u64 *npub64 = (u64 *)npub;
u64 *state64 = (u64 *)state;
u64 x0, x1, x2, x3, x4;
u64 t0, t1, t2, t3, t4;
t0 = t1 = t2 = t3 = t4 = 0;
// Init state
x0 = U64BIG(npub64[0]);
x1 = U64BIG(npub64[1]);
x2 = U64BIG(((u64 *)ISAP_IV1)[0]);
x3 = x4 = 0;
P12;
// Absorb AD
ABSORB_LANES(ad,adlen);
// Domain seperation
P12;
x4 ^= 0x0000000000000001ULL;
// Absorb C
ABSORB_LANES(c,clen);
P12;
// Derive K*
state64[0] = U64BIG(x0);
state64[1] = U64BIG(x1);
state64[2] = U64BIG(x2);
state64[3] = U64BIG(x3);
state64[4] = U64BIG(x4);
isap_rk(k,ISAP_IV2,(u8 *)state64,CRYPTO_KEYBYTES,(u8 *)state64,CRYPTO_KEYBYTES);
x0 = U64BIG(state64[0]);
x1 = U64BIG(state64[1]);
x2 = U64BIG(state64[2]);
x3 = U64BIG(state64[3]);
x4 = U64BIG(state64[4]);
// Squeeze tag
P12;
unsigned long long *tag64 = (u64 *)tag;
tag64[0] = U64BIG(x0);
tag64[1] = U64BIG(x1);
}
/******************************************************************************/
/* IsapEnc */
/******************************************************************************/
void isap_enc(
const u8 *k,
const u8 *npub,
const u8 *m,
const u64 mlen,
u8 *c
){
u8 state[ISAP_STATE_SZ];
// Init state
u64 *state64 = (u64 *)state;
u64 *npub64 = (u64 *)npub;
isap_rk(k,ISAP_IV3,npub,CRYPTO_NPUBBYTES,state,ISAP_STATE_SZ-CRYPTO_NPUBBYTES);
u64 x0, x1, x2, x3, x4;
u64 t0, t1, t2, t3, t4;
t0 = t1 = t2 = t3 = t4 = 0;
x0 = U64BIG(state64[0]);
x1 = U64BIG(state64[1]);
x2 = U64BIG(state64[2]);
x3 = U64BIG(npub64[0]);
x4 = U64BIG(npub64[1]);
P6;
// Squeeze key stream
u64 rem_bytes = mlen;
u64 *m64 = (u64 *)m;
u64 *c64 = (u64 *)c;
u32 idx64 = 0;
while(1){
if(rem_bytes>ISAP_rH_SZ){
// Squeeze full lane
c64[idx64] = U64BIG(x0) ^ m64[idx64];
idx64++;
P6;
rem_bytes -= ISAP_rH_SZ;
} else if(rem_bytes==ISAP_rH_SZ){
// Squeeze full lane and stop
c64[idx64] = U64BIG(x0) ^ m64[idx64];
break;
} else {
// Squeeze partial lane and stop
u64 lane64 = U64BIG(x0);
u8 *lane8 = (u8 *)&lane64;
u32 idx8 = idx64*8;
for (u32 i = 0; i < rem_bytes; i++) {
c[idx8] = lane8[i] ^ m[idx8];
idx8++;
}
break;
}
}
}
/******************************************************************************/
/* IsapMac and Enc */
/******************************************************************************/
void isap_mac_enc(
const u8 *k,
const u8 *npub,
const u8 *ad, const u64 adlen,
u8 *c, const u64 clen,
const u8 *m, const u64 mlen,
u8 *tag
){
u8 state_mac[ISAP_STATE_SZ];
const u64 *npub64 = (u64 *)npub;
u64 *state_mac64 = (u64 *)state_mac;
u64 x0, x1, x2, x3, x4;
u64 t0, t1, t2, t3, t4;
__m128i x0a, x1a, x2a, x3a, x4a;
__m128i t0a, t1a, t2a, t3a, t4a;
t0 = t1 = t2 = t3 = t4 = 0;
u8 state_enc[ISAP_STATE_SZ];
// Init state_enc
u64 *state_enc64 = (u64 *)state_enc;
isap_rk(k,ISAP_IV3,npub,CRYPTO_NPUBBYTES,state_enc,ISAP_STATE_SZ-CRYPTO_NPUBBYTES);
t0 = t1 = t2 = t3 = t4 = 0;
state_enc64[3] = npub64[0];
state_enc64[4] = npub64[1];
// Init state_mac
x0 = U64BIG(npub64[0]);
x1 = U64BIG(npub64[1]);
x2 = U64BIG(((u64 *)ISAP_IV1)[0]);
x3 = x4 = 0;
P12;
// Absorb AD
ABSORB_LANES(ad,adlen);
//Initialize AVX registers
t0 = U64BIG(state_enc64[0]);
t1 = U64BIG(state_enc64[1]);
t2 = U64BIG(state_enc64[2]);
t3 = U64BIG(state_enc64[3]);
t4 = U64BIG(state_enc64[4]);
u64 tmp[2];
tmp[1] = x0;
tmp[0] = t0;
x0a = _mm_maskz_loadu_epi64 (3, tmp);
tmp[1] = x1;
tmp[0] = t1;
x1a = _mm_maskz_loadu_epi64 (3, tmp);
tmp[1] = x2;
tmp[0] = t2;
x2a = _mm_maskz_loadu_epi64 (3, tmp);
tmp[1] = x3;
tmp[0] = t3;
x3a = _mm_maskz_loadu_epi64 (3, tmp);
tmp[1] = x4;
tmp[0] = t4;
x4a = _mm_maskz_loadu_epi64 (3, tmp);
// Squeeze key stream while absorbing ciphertext
long long rem_enc_bytes = mlen;
u64 tmpc1,tmpc2;
u64 tmpm1,tmpm2;
u64 encbytes1,encbytes2;
u32 idx8_enc = 0;
u32 idx8_mac = 0;
u64 tmpc_mac[2];
__m128i domain_separation = _mm_set_epi64x (0x0000000000000001ULL, 0);
long long rem_mac_bytes = clen;
do{
//prepare plaintext to encrypt
tmpm1 = 0;
tmpm2 = 0;
if(rem_enc_bytes>=ISAP_rH_SZ)
encbytes1 = 8;
else
encbytes1 = rem_enc_bytes;
if((rem_enc_bytes-encbytes1)>=ISAP_rH_SZ)
encbytes2 = 8;
else
if(encbytes1 < 8)
encbytes2 = 0;
else
encbytes2 = rem_enc_bytes - encbytes1;
for (u32 i = 0; i < encbytes1; i++)
tmpm1 = (tmpm1 << 8) | ((u64)m[idx8_enc+(encbytes1-i-1)]);
for (u32 i = 0; i < encbytes2; i++)
tmpm2 = (tmpm2 << 8) | ((u64)m[idx8_enc+encbytes1+(encbytes2-i-1)]);
//prepare ciphertext to authenticate
tmpc_mac[1] = 0;
if(rem_enc_bytes < mlen){
u8 *lane8 = (u8 *)&tmpc_mac[1];
for (u32 i = 0; i < 8; i++) {
if(i<(rem_mac_bytes)){
lane8[i] = c[idx8_mac];
idx8_mac++;
} else if(i==rem_mac_bytes){
lane8[i] = 0x80;
} else {
lane8[i] = 0x00;
}
}
tmpc_mac[1] = U64BIG(tmpc_mac[1]);
rem_mac_bytes -= ISAP_rH_SZ;
}
x0a = _mm_xor_si128 (x0a, _mm_maskz_loadu_epi64 (2, tmpc_mac));
P6_avx_first;
_mm_mask_storeu_epi64 (tmp, 1, x0a);
tmpc1 = U64BIG(tmp[0]) ^ tmpm1;
P6_avx_second;
_mm_mask_storeu_epi64 (tmp, 1, x0a);
tmpc2 = U64BIG(tmp[0]) ^ tmpm2;
x4a = _mm_xor_si128 (x4a, domain_separation);
domain_separation = _mm_andnot_si128 (domain_separation,domain_separation);
// Squeeze lane
u8 *lane8 = (u8 *)&tmpc1;
for (u32 i = 0; i < encbytes1; i++) {
c[idx8_enc] = lane8[i];
idx8_enc++;
}
lane8 = (u8 *)&tmpc2;
for (u32 i = 0; i < encbytes2; i++) {
c[idx8_enc] = lane8[i];
idx8_enc++;
}
rem_enc_bytes -= 2*ISAP_rH_SZ;
}while(rem_enc_bytes>0);
//end encrypting
//Restore MAC state
_mm_mask_storeu_epi64 (tmp, 2, x0a);
x0 = tmp[1];
_mm_mask_storeu_epi64 (tmp, 2, x1a);
x1 = tmp[1];
_mm_mask_storeu_epi64 (tmp, 2, x2a);
x2 = tmp[1];
_mm_mask_storeu_epi64 (tmp, 2, x3a);
x3 = tmp[1];
_mm_mask_storeu_epi64 (tmp, 2, x4a);
x4 = tmp[1];
// Absorb rest of C
while(rem_mac_bytes>=0){
tmpc_mac[1] = 0;
u8 *lane8 = (u8 *)&tmpc_mac[1];
for (u32 i = 0; i < 8; i++) {
if(i<(rem_mac_bytes)){
lane8[i] = c[idx8_mac];
idx8_mac++;
} else if(i==rem_mac_bytes){
lane8[i] = 0x80;
} else {
lane8[i] = 0x00;
}
}
x0 ^= U64BIG(tmpc_mac[1]);
P12;
rem_mac_bytes -= ISAP_rH_SZ;
}
// Derive K*
state_mac64[0] = U64BIG(x0);
state_mac64[1] = U64BIG(x1);
state_mac64[2] = U64BIG(x2);
state_mac64[3] = U64BIG(x3);
state_mac64[4] = U64BIG(x4);
isap_rk(k,ISAP_IV2,(u8 *)state_mac64,CRYPTO_KEYBYTES,(u8 *)state_mac64,CRYPTO_KEYBYTES);
x0 = U64BIG(state_mac64[0]);
x1 = U64BIG(state_mac64[1]);
x2 = U64BIG(state_mac64[2]);
x3 = U64BIG(state_mac64[3]);
x4 = U64BIG(state_mac64[4]);
// Squeeze tag
P12;
unsigned long long *tag64 = (u64 *)tag;
tag64[0] = U64BIG(x0);
tag64[1] = U64BIG(x1);
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH+7)/8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long long adlen,
const unsigned char *c, const unsigned long long clen,
unsigned char *tag
);
void isap_mac_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long long adlen,
unsigned char *c, const unsigned long long clen,
const unsigned char *m, const unsigned long long mlen,
unsigned char *tag
);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long long inlen,
unsigned char *out,
const unsigned long long outlen
);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long long mlen,
unsigned char *c
);
#endif
#ifndef API_H
#define API_H
// isapa128a
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
// asconhashv12
#define CRYPTO_BYTES 32
#define ASCON_HASH_OUTLEN 32
#define ASCON_HASH_ROUNDS 12
#endif
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH P12ROUNDS(s)
#define P_sB P1ROUNDS(s)
#define P_sE P6ROUNDS(s)
#define P_sK P12ROUNDS(s)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) to_bit_interleaving(U64BIG(x))
#define WORDTOU64(x) U64BIG(from_bit_interleaving(x))
/* ---------------------------------------------------------------- */
#define TOBI(x) to_bit_interleaving(x)
#define FROMBI(x) from_bit_interleaving(x)
/* ---------------------------------------------------------------- */
forceinline lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
forceinline lane_t to_bit_interleaving(lane_t in)
{
uint32_t lo = in.w[0];
uint32_t hi = in.w[1];
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
lane_t out;
out.w[0] = (lo & 0x0000FFFF) | (hi << 16);
out.w[1] = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
forceinline lane_t from_bit_interleaving(lane_t in)
{
uint32_t lo = ((in).w[0] & 0x0000FFFF) | ((in).w[1] << 16);
uint32_t hi = ((in).w[0] >> 16) | ((in).w[1] & 0xFFFF0000);
uint32_t r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
lane_t out;
out.x = (uint64_t)hi << 32 | lo;
return out;
}
/* ---------------------------------------------------------------- */
forceinline uint32_t ROR32(uint32_t x, int n)
{
return x >> n | x << (-n & 31);
}
/* ---------------------------------------------------------------- */
forceinline uint64_t ROR(uint64_t x, int n)
{
lane_t b, a = {.x = x};
b.w[0] = (n % 2) ? ROR32(a.w[1], (n - 1) / 2) : ROR32(a.w[0], n / 2);
b.w[1] = (n % 2) ? ROR32(a.w[0], (n + 1) / 2) : ROR32(a.w[1], n / 2);
return b.x;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint64_t C)
{
state_t t;
/* round constant */
s->x[2] ^= C;
/* s-box layer */
s->x[0] ^= s->x[4];
s->x[4] ^= s->x[3];
s->x[2] ^= s->x[1];
t.x[0] = s->x[0] ^ (~s->x[1] & s->x[2]);
t.x[2] = s->x[2] ^ (~s->x[3] & s->x[4]);
t.x[4] = s->x[4] ^ (~s->x[0] & s->x[1]);
t.x[1] = s->x[1] ^ (~s->x[2] & s->x[3]);
t.x[3] = s->x[3] ^ (~s->x[4] & s->x[0]);
t.x[1] ^= t.x[0];
t.x[3] ^= t.x[2];
t.x[0] ^= t.x[4];
/* linear layer */
s->x[2] = t.x[2] ^ ROR(t.x[2], 6 - 1);
s->x[3] = t.x[3] ^ ROR(t.x[3], 17 - 10);
s->x[4] = t.x[4] ^ ROR(t.x[4], 41 - 7);
s->x[0] = t.x[0] ^ ROR(t.x[0], 28 - 19);
s->x[1] = t.x[1] ^ ROR(t.x[1], 61 - 39);
s->x[2] = t.x[2] ^ ROR(s->x[2], 1);
s->x[3] = t.x[3] ^ ROR(s->x[3], 10);
s->x[4] = t.x[4] ^ ROR(s->x[4], 7);
s->x[0] = t.x[0] ^ ROR(s->x[0], 19);
s->x[1] = t.x[1] ^ ROR(s->x[1], 39);
s->x[2] = ~s->x[2];
}
/* ---------------------------------------------------------------- */
forceinline void P12ROUNDS(state_t *s)
{
ROUND(s, 0xc0000000c);
ROUND(s, 0xc00000009);
ROUND(s, 0x90000000c);
ROUND(s, 0x900000009);
ROUND(s, 0xc00000006);
ROUND(s, 0xc00000003);
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
forceinline void P6ROUNDS(state_t *s)
{
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
forceinline void P1ROUNDS(state_t *s)
{
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
#include "api.h"
#include "isap.h"
#include "crypto_aead.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Plaintext length is clen - tag length
*mlen = clen-ISAP_TAG_SZ;
// Generate tag
unsigned char tag[ISAP_TAG_SZ];
isap_mac(k,npub,ad,adlen,c,*mlen,tag);
// Compare tag
unsigned long eq_cnt = 0;
for(unsigned int i = 0; i < ISAP_TAG_SZ; i++) {
eq_cnt += (tag[i] == c[(*mlen)+i]);
}
// Perform decryption if tag is correct
if(eq_cnt == (unsigned long)ISAP_TAG_SZ){
if (*mlen > 0) {
isap_enc(k,npub,c,*mlen,m);
}
return 0;
} else {
return -1;
}
}
#ifndef FORCEINLINE_H_
#define FORCEINLINE_H_
/* define forceinline macro */
#ifdef _MSC_VER
#define forceinline __forceinline
#elif defined(__GNUC__)
#define forceinline inline __attribute__((__always_inline__))
#elif defined(__CLANG__)
#if __has_attribute(__always_inline__)
#define forceinline inline __attribute__((__always_inline__))
#else
#define forceinline inline
#endif
#else
#define forceinline inline
#endif
#endif /* FORCEINLINE_H_ */
#include <string.h>
#include <inttypes.h>
#include "api.h"
#include "isap.h"
#include "asconp.h"
#include "config.h"
forceinline void ABSORB_LANES(state_t *s, const uint8_t *src, uint64_t len)
{
while (len >= 8)
{
// Absorb full lanes
lane_t t0 = U64TOWORD(*(lane_t *)(src + 0));
s->x[0] ^= t0.x;
len -= ISAP_rH / 8;
src += ISAP_rH / 8;
P_sH;
}
if (len > 0)
{
// Absorb partial lane and padding
size_t i;
lane_t t0 = {0};
for (i = 0; i < len; i++)
{
t0.b[7 - i] ^= *src;
src++;
}
t0.b[7 - i] ^= 0x80;
t0 = TOBI(t0);
s->x[0] ^= t0.x;
P_sH;
}
else
{
// Absorb padded empty lane
s->b[0][7] ^= 0x80;
P_sH;
}
}
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const uint8_t *k,
const uint8_t *iv,
const uint8_t *y,
state_t *out,
const size_t outlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(k + 0));
s->l[1] = U64TOWORD(*(lane_t *)(k + 8));
s->l[2] = U64TOWORD(*(lane_t *)(iv + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sK;
// Absorb Y, bit by bit
for (size_t i = 0; i < 16; i++)
{
uint8_t y_byte = *y;
s->b[0][7] ^= (y_byte & 0x80) << 0;
P_sB;
s->b[0][7] ^= (y_byte & 0x40) << 1;
P_sB;
s->b[0][7] ^= (y_byte & 0x20) << 2;
P_sB;
s->b[0][7] ^= (y_byte & 0x10) << 3;
P_sB;
s->b[0][7] ^= (y_byte & 0x08) << 4;
P_sB;
s->b[0][7] ^= (y_byte & 0x04) << 5;
P_sB;
s->b[0][7] ^= (y_byte & 0x02) << 6;
P_sB;
s->b[0][7] ^= (y_byte & 0x01) << 7;
if (i != 15)
{
P_sB;
y += 1;
}
}
// Squeeze K*
P_sK;
out->x[0] = s->x[0];
out->x[1] = s->x[1];
if (outlen > 16)
{
out->x[2] = s->x[2];
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, uint64_t adlen,
const uint8_t *c, uint64_t clen,
uint8_t *tag)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[1] = U64TOWORD(*(lane_t *)(npub + 8));
s->l[2] = U64TOWORD(*(lane_t *)(ISAP_IV_A + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb associated data
ABSORB_LANES(s, ad, adlen);
// Domain seperation
s->w[4][0] ^= 0x1UL;
// Absorb ciphertext
ABSORB_LANES(s, c, clen);
// Derive KA*
s->l[0] = WORDTOU64(s->l[0]);
s->l[1] = WORDTOU64(s->l[1]);
isap_rk(k, ISAP_IV_KA, (const uint8_t *)(s->b), s, CRYPTO_KEYBYTES);
// Squeeze tag
P_sH;
lane_t t0 = WORDTOU64(s->l[0]);
memcpy(tag + 0, t0.b, 8);
t0 = WORDTOU64(s->l[1]);
memcpy(tag + 8, t0.b, 8);
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, uint64_t mlen,
uint8_t *c)
{
state_t state;
state_t *s = &state;
// Init state
isap_rk(k, ISAP_IV_KE, npub, s, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
s->l[3] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[4] = U64TOWORD(*(lane_t *)(npub + 8));
while (mlen >= ISAP_rH / 8)
{
// Encrypt full lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)c = *(uint64_t *)m ^ t0.x;
mlen -= ISAP_rH / 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
if (mlen > 0)
{
// Encrypt partial lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
for (uint8_t i = 0; i < mlen; i++)
{
*c = *m ^ t0.b[i];
m += 1;
c += 1;
}
}
}
/******************************************************************************/
/* Ascon-Hash */
/******************************************************************************/
#if ENABLE_HASH == 1
int crypto_hash(uint8_t *out, const uint8_t *in, unsigned long long inlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(ASCON_HASH_IV + 0));
s->x[1] = 0;
s->x[2] = 0;
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb input
ABSORB_LANES(s, in, inlen);
for (size_t i = 0; i < 4; i++)
{
// Squeeze full lanes
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)(out + 8 * i) = t0.x;
if (i < 3)
{
P_sH;
}
}
return 0;
}
#endif
#ifndef ISAP_H
#define ISAP_H
#include <inttypes.h>
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, const uint64_t adlen,
const uint8_t *c, const uint64_t clen,
uint8_t *tag);
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, const uint64_t mlen,
uint8_t *c);
#endif
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH P12ROUNDS(s)
#define P_sB P1ROUNDS(s)
#define P_sE P6ROUNDS(s)
#define P_sK P12ROUNDS(s)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) to_bit_interleaving(U64BIG(x))
#define WORDTOU64(x) U64BIG(from_bit_interleaving(x))
/* ---------------------------------------------------------------- */
#define TOBI(x) to_bit_interleaving(x)
#define FROMBI(x) from_bit_interleaving(x)
/* ---------------------------------------------------------------- */
forceinline lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
forceinline lane_t to_bit_interleaving(lane_t in)
{
uint32_t lo = in.w[0];
uint32_t hi = in.w[1];
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
lane_t out;
out.w[0] = (lo & 0x0000FFFF) | (hi << 16);
out.w[1] = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
forceinline lane_t from_bit_interleaving(lane_t in)
{
uint32_t lo = ((in).w[0] & 0x0000FFFF) | ((in).w[1] << 16);
uint32_t hi = ((in).w[0] >> 16) | ((in).w[1] & 0xFFFF0000);
uint32_t r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
lane_t out;
out.x = (uint64_t)hi << 32 | lo;
return out;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint64_t C)
{
uint32_t tmp0, tmp1;
__asm__ __volatile__(
"eor %[x0_e], %[x0_e], %[x4_e]\n\t"
"eor %[x4_e], %[x4_e], %[x3_e]\n\t"
"eor %[x2_e], %[x2_e], %[x1_e]\n\t"
"mvn %[tmp0], %[x0_e]\n\t"
"orr %[tmp0], %[x4_e], %[tmp0]\n\t"
"eor %[x2_e], %[x2_e], %[C_e]\n\t"
"bic %[tmp1], %[x2_e], %[x1_e]\n\t"
"eor %[x0_e], %[x0_e], %[tmp1]\n\t"
"mvn %[tmp1], %[x4_e]\n\t"
"orr %[tmp1], %[x3_e], %[tmp1]\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"bic %[tmp1], %[x1_e], %[x0_e]\n\t"
"eor %[x4_e], %[x4_e], %[tmp1]\n\t"
"and %[tmp1], %[x3_e], %[x2_e]\n\t"
"eor %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[x3_e], %[x3_e], %[tmp0]\n\t"
"eor %[x1_e], %[x1_e], %[x0_e]\n\t"
"eor %[x3_e], %[x3_e], %[x2_e]\n\t"
"eor %[x0_e], %[x0_e], %[x4_e]\n\t"
"eor %[x0_o], %[x0_o], %[x4_o]\n\t"
"eor %[x4_o], %[x4_o], %[x3_o]\n\t"
"eor %[x2_o], %[x2_o], %[C_o]\n\t"
"eor %[x2_o], %[x2_o], %[x1_o]\n\t"
"mvn %[tmp0], %[x0_o]\n\t"
"orr %[tmp0], %[x4_o], %[tmp0]\n\t"
"bic %[tmp1], %[x2_o], %[x1_o]\n\t"
"eor %[x0_o], %[x0_o], %[tmp1]\n\t"
"mvn %[tmp1], %[x4_o]\n\t"
"orr %[tmp1], %[x3_o], %[tmp1]\n\t"
"eor %[x2_o], %[x2_o], %[tmp1]\n\t"
"bic %[tmp1], %[x1_o], %[x0_o]\n\t"
"eor %[x4_o], %[x4_o], %[tmp1]\n\t"
"and %[tmp1], %[x3_o], %[x2_o]\n\t"
"eor %[x1_o], %[x1_o], %[tmp1]\n\t"
"eor %[x3_o], %[x3_o], %[tmp0]\n\t"
"eor %[x1_o], %[x1_o], %[x0_o]\n\t"
"eor %[x3_o], %[x3_o], %[x2_o]\n\t"
"eor %[x0_o], %[x0_o], %[x4_o]\n\t"
"eor %[tmp0], %[x0_e], %[x0_o], ror #4\n\t"
"eor %[tmp1], %[x0_o], %[x0_e], ror #5\n\t"
"eor %[x0_o], %[x0_o], %[tmp0], ror #10\n\t"
"eor %[x0_e], %[x0_e], %[tmp1], ror #9\n\t"
"eor %[tmp0], %[x1_e], %[x1_e], ror #11\n\t"
"eor %[tmp1], %[x1_o], %[x1_o], ror #11\n\t"
"eor %[x1_o], %[x1_o], %[tmp0], ror #20\n\t"
"eor %[x1_e], %[x1_e], %[tmp1], ror #19\n\t"
"eor %[tmp0], %[x2_e], %[x2_o], ror #2\n\t"
"eor %[tmp1], %[x2_o], %[x2_e], ror #3\n\t"
"eor %[x2_o], %[x2_o], %[tmp0], ror #1\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[tmp0], %[x3_e], %[x3_o], ror #3\n\t"
"eor %[tmp1], %[x3_o], %[x3_e], ror #4\n\t"
"eor %[x3_e], %[x3_e], %[tmp0], ror #5\n\t"
"eor %[x3_o], %[x3_o], %[tmp1], ror #5\n\t"
"eor %[tmp0], %[x4_e], %[x4_e], ror #17\n\t"
"eor %[tmp1], %[x4_o], %[x4_o], ror #17\n\t"
"eor %[x4_o], %[x4_o], %[tmp0], ror #4\n\t"
"eor %[x4_e], %[x4_e], %[tmp1], ror #3\n\t"
: [x0_e] "+r"(s->w[0][0]), [x0_o] "+r"(s->w[0][1]),
[x1_e] "+r"(s->w[1][0]), [x1_o] "+r"(s->w[1][1]),
[x2_e] "+r"(s->w[2][0]), [x2_o] "+r"(s->w[2][1]),
[x3_e] "+r"(s->w[3][0]), [x3_o] "+r"(s->w[3][1]),
[x4_e] "+r"(s->w[4][0]), [x4_o] "+r"(s->w[4][1]), [tmp0] "=r"(tmp0),
[tmp1] "=r"(tmp1)
: [C_e] "i"((uint32_t)C), [C_o] "i"((uint32_t)(C >> 32))
:);
}
/* ---------------------------------------------------------------- */
forceinline void P12ROUNDS(state_t *s)
{
ROUND(s, 0xc0000000c);
ROUND(s, 0xc00000009);
ROUND(s, 0x90000000c);
ROUND(s, 0x900000009);
ROUND(s, 0xc00000006);
ROUND(s, 0xc00000003);
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
forceinline void P6ROUNDS(state_t *s)
{
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
forceinline void P1ROUNDS(state_t *s)
{
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H_
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
../bi_32/crypto_aead.c
\ No newline at end of file
../bi_32/forceinline.h
\ No newline at end of file
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH P12ROUNDS(s)
#define P_sB P1ROUNDS(s)
#define P_sE P6ROUNDS(s)
#define P_sK P12ROUNDS(s)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) to_bit_interleaving(U64BIG(x))
#define WORDTOU64(x) U64BIG(from_bit_interleaving(x))
/* ---------------------------------------------------------------- */
#define TOBI(x) to_bit_interleaving(x)
#define FROMBI(x) from_bit_interleaving(x)
/* ---------------------------------------------------------------- */
forceinline lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
forceinline lane_t to_bit_interleaving(lane_t in)
{
uint32_t lo = in.w[0];
uint32_t hi = in.w[1];
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
lane_t out;
out.w[0] = (lo & 0x0000FFFF) | (hi << 16);
out.w[1] = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
forceinline lane_t from_bit_interleaving(lane_t in)
{
uint32_t lo = ((in).w[0] & 0x0000FFFF) | ((in).w[1] << 16);
uint32_t hi = ((in).w[0] >> 16) | ((in).w[1] & 0xFFFF0000);
uint32_t r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
lane_t out;
out.x = (uint64_t)hi << 32 | lo;
return out;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint64_t C)
{
uint32_t tmp0, tmp1, tmp2;
__asm__ __volatile__(
"@.syntax_unified\n\t"
"movs %[tmp1], %[C_e]\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x0_e], %[x0_e], %[x4_e]\n\t"
"eor %[x4_e], %[x4_e], %[x3_e]\n\t"
"eor %[x2_e], %[x2_e], %[x1_e]\n\t"
"movs %[tmp0], %[x0_e]\n\t"
"bic %[tmp0], %[tmp0], %[x4_e]\n\t"
"movs %[tmp1], %[x2_e]\n\t"
"bic %[tmp1], %[tmp1], %[x1_e]\n\t"
"eor %[x0_e], %[x0_e], %[tmp1]\n\t"
"movs %[tmp1], %[x4_e]\n\t"
"bic %[tmp1], %[tmp1], %[x3_e]\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"movs %[tmp2], %[x1_e]\n\t"
"bic %[tmp2], %[tmp2], %[x0_e]\n\t"
"eor %[tmp2], %[x4_e], %[tmp2]\n\t"
"movs %[tmp1], %[x3_e]\n\t"
"bic %[tmp1], %[tmp1], %[x2_e]\n\t"
"eor %[tmp1], %[x1_e], %[tmp1]\n\t"
"eor %[tmp0], %[x3_e], %[tmp0]\n\t"
"eor %[tmp0], %[tmp0], %[x2_e]\n\t"
"eor %[tmp1], %[tmp1], %[x0_e]\n\t"
"eor %[x0_e], %[x0_e], %[tmp2]\n\t"
"movs %[x4_e], %[x4_o]\n\t"
"movs %[x1_e], %[x1_o]\n\t"
"movs %[x3_e], %[x3_o]\n\t"
"movs %[x3_o], %[tmp0]\n\t"
"movs %[x1_o], %[tmp1]\n\t"
"movs %[tmp0], %[x0_o]\n\t"
"movs %[tmp1], %[x2_o]\n\t"
"movs %[x0_o], %[x0_e]\n\t"
"movs %[x2_o], %[x2_e]\n\t"
"movs %[x0_e], %[C_o]\n\t"
"eor %[tmp1], %[tmp1], %[x0_e]\n\t"
"eor %[tmp0], %[tmp0], %[x4_e]\n\t"
"eor %[x4_e], %[x4_e], %[x3_e]\n\t"
"eor %[tmp1], %[tmp1], %[x1_e]\n\t"
"movs %[x0_e], %[tmp0] \n\t"
"bic %[x0_e], %[x0_e], %[x4_e]\n\t"
"movs %[x2_e], %[tmp1] \n\t"
"bic %[x2_e], %[x2_e], %[x1_e]\n\t"
"eor %[tmp0], %[tmp0], %[x2_e]\n\t"
"movs %[x2_e], %[x4_e] \n\t"
"bic %[x2_e], %[x2_e], %[x3_e]\n\t"
"eor %[tmp1], %[tmp1], %[x2_e]\n\t"
"movs %[x2_e], %[x1_e]\n\t"
"bic %[x2_e], %[x2_e], %[tmp0]\n\t"
"eor %[x4_e], %[x4_e], %[x2_e]\n\t"
"movs %[x2_e], %[x3_e] \n\t"
"bic %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x1_e], %[x1_e], %[x2_e]\n\t"
"eor %[x3_e], %[x3_e], %[x0_e]\n\t"
"eor %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x1_e], %[x1_e], %[tmp0]\n\t"
"eor %[tmp0], %[tmp0], %[x4_e]\n\t"
"movs %[x4_o], %[tmp1]\n\t"
"movs %[x2_e], %[x3_o]\n\t"
"movs %[x3_o], %[x1_e]\n\t"
"movs %[tmp1], #17\n\t"
"movs %[x0_e], %[tmp2]\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x0_e], %[tmp2], %[x0_e]\n\t"
"movs %[x1_e], %[x4_e]\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[x1_e], %[x4_e], %[x1_e]\n\t"
"movs %[tmp1], #3\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[tmp2], %[tmp2], %[x1_e]\n\t"
"movs %[tmp1], #4\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x4_e], %[x4_e], %[x0_e]\n\t"
"movs %[x1_e], %[x2_e]\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[x1_e], %[x3_e], %[x1_e]\n\t"
"movs %[tmp1], #3\n\t"
"movs %[x0_e], %[x3_e]\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x0_e], %[x2_e], %[x0_e]\n\t"
"movs %[tmp1], #5\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x2_e], %[x2_e], %[x0_e]\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[x3_e], %[x3_e], %[x1_e]\n\t"
"movs %[x0_e], %[x0_o]\n\t"
"movs %[x1_e], %[x1_o]\n\t"
"movs %[x1_o], %[x2_e]\n\t"
"movs %[x0_o], %[tmp2]\n\t"
"movs %[tmp2], %[x4_o]\n\t"
"movs %[x4_o], %[x4_e]\n\t"
"movs %[x4_e], %[x3_o]\n\t"
"movs %[x3_o], %[x3_e]\n\t"
"movs %[x3_e], %[x0_e]\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x3_e], %[tmp0], %[x3_e]\n\t"
"movs %[tmp1], #4\n\t"
"movs %[x2_e], %[tmp0]\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x2_e], %[x0_e], %[x2_e]\n\t"
"movs %[tmp1], #9\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x0_e], %[x0_e], %[x3_e]\n\t"
"movs %[tmp1], #10\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[tmp0], %[tmp0], %[x2_e]\n\t"
"movs %[tmp1], #11\n\t"
"movs %[x2_e], %[x1_e]\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x2_e], %[x1_e], %[x2_e]\n\t"
"movs %[x3_e], %[x4_e]\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x3_e], %[x4_e], %[x3_e]\n\t"
"movs %[tmp1], #19\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x1_e], %[x1_e], %[x3_e]\n\t"
"movs %[tmp1], #20\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x4_e], %[x4_e], %[x2_e]\n\t"
"movs %[x2_e], %[x2_o]\n\t"
"movs %[x3_e], %[x1_o]\n\t"
"movs %[x1_o], %[x4_e]\n\t"
"movs %[x2_o], %[tmp0]\n\t"
"movs %[x4_e], #2\n\t"
"mvn %[tmp0], %[tmp2]\n\t"
"ror %[tmp0], %[tmp0], %[x4_e]\n\t"
"eor %[tmp0], %[x2_e], %[tmp0]\n\t"
"movs %[x4_e], #3\n\t"
"mvn %[tmp1], %[x2_e]\n\t"
"ror %[tmp1], %[tmp1], %[x4_e]\n\t"
"eor %[tmp1], %[tmp2], %[tmp1]\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"movs %[x4_e], #1\n\t"
"ror %[tmp0], %[tmp0], %[x4_e]\n\t"
"eor %[tmp2], %[tmp2], %[tmp0]\n\t"
"movs %[x4_e], %[x0_o]\n\t"
"movs %[x0_o], %[x2_o]\n\t"
"movs %[x2_o], %[tmp2]\n\t"
: [x0_e] "+l"(s->w[0][0]), [x1_e] "+l"(s->w[1][0]),
[x2_e] "+l"(s->w[2][0]), [x3_e] "+l"(s->w[3][0]),
[x4_e] "+l"(s->w[4][0]), [x0_o] "+r"(s->w[0][1]),
[x1_o] "+r"(s->w[1][1]), [x2_o] "+r"(s->w[2][1]),
[x3_o] "+r"(s->w[3][1]), [x4_o] "+r"(s->w[4][1]),
[tmp0] "=l"(tmp0), [tmp1] "=l"(tmp1), [tmp2] "=l"(tmp2)
: [C_e] "ri"((uint32_t)C), [C_o] "ri"((uint32_t)(C >> 32))
:);
}
/* ---------------------------------------------------------------- */
forceinline void P12ROUNDS(state_t *s)
{
ROUND(s, 0xc0000000c);
ROUND(s, 0xc00000009);
ROUND(s, 0x90000000c);
ROUND(s, 0x900000009);
ROUND(s, 0xc00000006);
ROUND(s, 0xc00000003);
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
forceinline void P6ROUNDS(state_t *s)
{
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
forceinline void P1ROUNDS(state_t *s)
{
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H_
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
../bi_32/crypto_aead.c
\ No newline at end of file
../bi_32/forceinline.h
\ No newline at end of file
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH PROUNDS(s, 12)
#define P_sB PROUNDS(s, 1)
#define P_sE PROUNDS(s, 6)
#define P_sK PROUNDS(s, 12)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) to_bit_interleaving(U64BIG(x))
#define WORDTOU64(x) U64BIG(from_bit_interleaving(x))
/* ---------------------------------------------------------------- */
#define TOBI(x) to_bit_interleaving(x)
#define FROMBI(x) from_bit_interleaving(x)
/* ---------------------------------------------------------------- */
lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
lane_t to_bit_interleaving(lane_t in)
{
uint32_t lo = in.w[0];
uint32_t hi = in.w[1];
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
lane_t out;
out.w[0] = (lo & 0x0000FFFF) | (hi << 16);
out.w[1] = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
lane_t from_bit_interleaving(lane_t in)
{
uint32_t lo = ((in).w[0] & 0x0000FFFF) | ((in).w[1] << 16);
uint32_t hi = ((in).w[0] >> 16) | ((in).w[1] & 0xFFFF0000);
uint32_t r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
lane_t out;
out.x = (uint64_t)hi << 32 | lo;
return out;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint64_t C)
{
uint32_t tmp0, tmp1, tmp2;
__asm__ __volatile__(
"@.syntax_unified\n\t"
"movs %[tmp1], %[C_e]\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x0_e], %[x0_e], %[x4_e]\n\t"
"eor %[x4_e], %[x4_e], %[x3_e]\n\t"
"eor %[x2_e], %[x2_e], %[x1_e]\n\t"
"movs %[tmp0], %[x0_e]\n\t"
"bic %[tmp0], %[tmp0], %[x4_e]\n\t"
"movs %[tmp1], %[x2_e]\n\t"
"bic %[tmp1], %[tmp1], %[x1_e]\n\t"
"eor %[x0_e], %[x0_e], %[tmp1]\n\t"
"movs %[tmp1], %[x4_e]\n\t"
"bic %[tmp1], %[tmp1], %[x3_e]\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"movs %[tmp2], %[x1_e]\n\t"
"bic %[tmp2], %[tmp2], %[x0_e]\n\t"
"eor %[tmp2], %[x4_e], %[tmp2]\n\t"
"movs %[tmp1], %[x3_e]\n\t"
"bic %[tmp1], %[tmp1], %[x2_e]\n\t"
"eor %[tmp1], %[x1_e], %[tmp1]\n\t"
"eor %[tmp0], %[x3_e], %[tmp0]\n\t"
"eor %[tmp0], %[tmp0], %[x2_e]\n\t"
"eor %[tmp1], %[tmp1], %[x0_e]\n\t"
"eor %[x0_e], %[x0_e], %[tmp2]\n\t"
"movs %[x4_e], %[x4_o]\n\t"
"movs %[x1_e], %[x1_o]\n\t"
"movs %[x3_e], %[x3_o]\n\t"
"movs %[x3_o], %[tmp0]\n\t"
"movs %[x1_o], %[tmp1]\n\t"
"movs %[tmp0], %[x0_o]\n\t"
"movs %[tmp1], %[x2_o]\n\t"
"movs %[x0_o], %[x0_e]\n\t"
"movs %[x2_o], %[x2_e]\n\t"
"movs %[x0_e], %[C_o]\n\t"
"eor %[tmp1], %[tmp1], %[x0_e]\n\t"
"eor %[tmp0], %[tmp0], %[x4_e]\n\t"
"eor %[x4_e], %[x4_e], %[x3_e]\n\t"
"eor %[tmp1], %[tmp1], %[x1_e]\n\t"
"movs %[x0_e], %[tmp0] \n\t"
"bic %[x0_e], %[x0_e], %[x4_e]\n\t"
"movs %[x2_e], %[tmp1] \n\t"
"bic %[x2_e], %[x2_e], %[x1_e]\n\t"
"eor %[tmp0], %[tmp0], %[x2_e]\n\t"
"movs %[x2_e], %[x4_e] \n\t"
"bic %[x2_e], %[x2_e], %[x3_e]\n\t"
"eor %[tmp1], %[tmp1], %[x2_e]\n\t"
"movs %[x2_e], %[x1_e]\n\t"
"bic %[x2_e], %[x2_e], %[tmp0]\n\t"
"eor %[x4_e], %[x4_e], %[x2_e]\n\t"
"movs %[x2_e], %[x3_e] \n\t"
"bic %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x1_e], %[x1_e], %[x2_e]\n\t"
"eor %[x3_e], %[x3_e], %[x0_e]\n\t"
"eor %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x1_e], %[x1_e], %[tmp0]\n\t"
"eor %[tmp0], %[tmp0], %[x4_e]\n\t"
"movs %[x4_o], %[tmp1]\n\t"
"movs %[x2_e], %[x3_o]\n\t"
"movs %[x3_o], %[x1_e]\n\t"
"movs %[tmp1], #17\n\t"
"movs %[x0_e], %[tmp2]\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x0_e], %[tmp2], %[x0_e]\n\t"
"movs %[x1_e], %[x4_e]\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[x1_e], %[x4_e], %[x1_e]\n\t"
"movs %[tmp1], #3\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[tmp2], %[tmp2], %[x1_e]\n\t"
"movs %[tmp1], #4\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x4_e], %[x4_e], %[x0_e]\n\t"
"movs %[x1_e], %[x2_e]\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[x1_e], %[x3_e], %[x1_e]\n\t"
"movs %[tmp1], #3\n\t"
"movs %[x0_e], %[x3_e]\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x0_e], %[x2_e], %[x0_e]\n\t"
"movs %[tmp1], #5\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x2_e], %[x2_e], %[x0_e]\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[x3_e], %[x3_e], %[x1_e]\n\t"
"movs %[x0_e], %[x0_o]\n\t"
"movs %[x1_e], %[x1_o]\n\t"
"movs %[x1_o], %[x2_e]\n\t"
"movs %[x0_o], %[tmp2]\n\t"
"movs %[tmp2], %[x4_o]\n\t"
"movs %[x4_o], %[x4_e]\n\t"
"movs %[x4_e], %[x3_o]\n\t"
"movs %[x3_o], %[x3_e]\n\t"
"movs %[x3_e], %[x0_e]\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x3_e], %[tmp0], %[x3_e]\n\t"
"movs %[tmp1], #4\n\t"
"movs %[x2_e], %[tmp0]\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x2_e], %[x0_e], %[x2_e]\n\t"
"movs %[tmp1], #9\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x0_e], %[x0_e], %[x3_e]\n\t"
"movs %[tmp1], #10\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[tmp0], %[tmp0], %[x2_e]\n\t"
"movs %[tmp1], #11\n\t"
"movs %[x2_e], %[x1_e]\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x2_e], %[x1_e], %[x2_e]\n\t"
"movs %[x3_e], %[x4_e]\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x3_e], %[x4_e], %[x3_e]\n\t"
"movs %[tmp1], #19\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x1_e], %[x1_e], %[x3_e]\n\t"
"movs %[tmp1], #20\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x4_e], %[x4_e], %[x2_e]\n\t"
"movs %[x2_e], %[x2_o]\n\t"
"movs %[x3_e], %[x1_o]\n\t"
"movs %[x1_o], %[x4_e]\n\t"
"movs %[x2_o], %[tmp0]\n\t"
"movs %[x4_e], #2\n\t"
"mvn %[tmp0], %[tmp2]\n\t"
"ror %[tmp0], %[tmp0], %[x4_e]\n\t"
"eor %[tmp0], %[x2_e], %[tmp0]\n\t"
"movs %[x4_e], #3\n\t"
"mvn %[tmp1], %[x2_e]\n\t"
"ror %[tmp1], %[tmp1], %[x4_e]\n\t"
"eor %[tmp1], %[tmp2], %[tmp1]\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"movs %[x4_e], #1\n\t"
"ror %[tmp0], %[tmp0], %[x4_e]\n\t"
"eor %[tmp2], %[tmp2], %[tmp0]\n\t"
"movs %[x4_e], %[x0_o]\n\t"
"movs %[x0_o], %[x2_o]\n\t"
"movs %[x2_o], %[tmp2]\n\t"
: [x0_e] "+l"(s->w[0][0]), [x1_e] "+l"(s->w[1][0]),
[x2_e] "+l"(s->w[2][0]), [x3_e] "+l"(s->w[3][0]),
[x4_e] "+l"(s->w[4][0]), [x0_o] "+r"(s->w[0][1]),
[x1_o] "+r"(s->w[1][1]), [x2_o] "+r"(s->w[2][1]),
[x3_o] "+r"(s->w[3][1]), [x4_o] "+r"(s->w[4][1]),
[tmp0] "=l"(tmp0), [tmp1] "=l"(tmp1), [tmp2] "=l"(tmp2)
: [C_e] "ri"((uint32_t)C), [C_o] "ri"((uint32_t)(C >> 32))
:);
}
/* ---------------------------------------------------------------- */
void PROUNDS(state_t *s, uint8_t nr)
{
switch (nr)
{
case 12:
ROUND(s, 0xc0000000c);
ROUND(s, 0xc00000009);
ROUND(s, 0x90000000c);
ROUND(s, 0x900000009);
ROUND(s, 0xc00000006);
ROUND(s, 0xc00000003);
case 6:
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
default:
ROUND(s, 0x300000009);
;
}
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH PROUNDS(s, 12)
#define P_sB PROUNDS(s, 1)
#define P_sE PROUNDS(s, 6)
#define P_sK PROUNDS(s, 12)
#define P_PVP PROUNDS(s, 7)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) to_bit_interleaving(U64BIG(x))
#define WORDTOU64(x) U64BIG(from_bit_interleaving(x))
/* ---------------------------------------------------------------- */
#define TOBI(x) to_bit_interleaving(x)
#define FROMBI(x) from_bit_interleaving(x)
/* ---------------------------------------------------------------- */
lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
lane_t to_bit_interleaving(lane_t in)
{
uint32_t lo = in.w[0];
uint32_t hi = in.w[1];
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
lane_t out;
out.w[0] = (lo & 0x0000FFFF) | (hi << 16);
out.w[1] = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
lane_t from_bit_interleaving(lane_t in)
{
uint32_t lo = ((in).w[0] & 0x0000FFFF) | ((in).w[1] << 16);
uint32_t hi = ((in).w[0] >> 16) | ((in).w[1] & 0xFFFF0000);
uint32_t r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
lane_t out;
out.x = (uint64_t)hi << 32 | lo;
return out;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint64_t C)
{
uint32_t tmp0, tmp1, tmp2;
__asm__ __volatile__(
"@.syntax_unified\n\t"
"movs %[tmp1], %[C_e]\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x0_e], %[x0_e], %[x4_e]\n\t"
"eor %[x4_e], %[x4_e], %[x3_e]\n\t"
"eor %[x2_e], %[x2_e], %[x1_e]\n\t"
"movs %[tmp0], %[x0_e]\n\t"
"bic %[tmp0], %[tmp0], %[x4_e]\n\t"
"movs %[tmp1], %[x2_e]\n\t"
"bic %[tmp1], %[tmp1], %[x1_e]\n\t"
"eor %[x0_e], %[x0_e], %[tmp1]\n\t"
"movs %[tmp1], %[x4_e]\n\t"
"bic %[tmp1], %[tmp1], %[x3_e]\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"movs %[tmp2], %[x1_e]\n\t"
"bic %[tmp2], %[tmp2], %[x0_e]\n\t"
"eor %[tmp2], %[x4_e], %[tmp2]\n\t"
"movs %[tmp1], %[x3_e]\n\t"
"bic %[tmp1], %[tmp1], %[x2_e]\n\t"
"eor %[tmp1], %[x1_e], %[tmp1]\n\t"
"eor %[tmp0], %[x3_e], %[tmp0]\n\t"
"eor %[tmp0], %[tmp0], %[x2_e]\n\t"
"eor %[tmp1], %[tmp1], %[x0_e]\n\t"
"eor %[x0_e], %[x0_e], %[tmp2]\n\t"
"movs %[x4_e], %[x4_o]\n\t"
"movs %[x1_e], %[x1_o]\n\t"
"movs %[x3_e], %[x3_o]\n\t"
"movs %[x3_o], %[tmp0]\n\t"
"movs %[x1_o], %[tmp1]\n\t"
"movs %[tmp0], %[x0_o]\n\t"
"movs %[tmp1], %[x2_o]\n\t"
"movs %[x0_o], %[x0_e]\n\t"
"movs %[x2_o], %[x2_e]\n\t"
"movs %[x0_e], %[C_o]\n\t"
"eor %[tmp1], %[tmp1], %[x0_e]\n\t"
"eor %[tmp0], %[tmp0], %[x4_e]\n\t"
"eor %[x4_e], %[x4_e], %[x3_e]\n\t"
"eor %[tmp1], %[tmp1], %[x1_e]\n\t"
"movs %[x0_e], %[tmp0] \n\t"
"bic %[x0_e], %[x0_e], %[x4_e]\n\t"
"movs %[x2_e], %[tmp1] \n\t"
"bic %[x2_e], %[x2_e], %[x1_e]\n\t"
"eor %[tmp0], %[tmp0], %[x2_e]\n\t"
"movs %[x2_e], %[x4_e] \n\t"
"bic %[x2_e], %[x2_e], %[x3_e]\n\t"
"eor %[tmp1], %[tmp1], %[x2_e]\n\t"
"movs %[x2_e], %[x1_e]\n\t"
"bic %[x2_e], %[x2_e], %[tmp0]\n\t"
"eor %[x4_e], %[x4_e], %[x2_e]\n\t"
"movs %[x2_e], %[x3_e] \n\t"
"bic %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x1_e], %[x1_e], %[x2_e]\n\t"
"eor %[x3_e], %[x3_e], %[x0_e]\n\t"
"eor %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x1_e], %[x1_e], %[tmp0]\n\t"
"eor %[tmp0], %[tmp0], %[x4_e]\n\t"
"movs %[x4_o], %[tmp1]\n\t"
"movs %[x2_e], %[x3_o]\n\t"
"movs %[x3_o], %[x1_e]\n\t"
"movs %[tmp1], #17\n\t"
"movs %[x0_e], %[tmp2]\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x0_e], %[tmp2], %[x0_e]\n\t"
"movs %[x1_e], %[x4_e]\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[x1_e], %[x4_e], %[x1_e]\n\t"
"movs %[tmp1], #3\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[tmp2], %[tmp2], %[x1_e]\n\t"
"movs %[tmp1], #4\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x4_e], %[x4_e], %[x0_e]\n\t"
"movs %[x1_e], %[x2_e]\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[x1_e], %[x3_e], %[x1_e]\n\t"
"movs %[tmp1], #3\n\t"
"movs %[x0_e], %[x3_e]\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x0_e], %[x2_e], %[x0_e]\n\t"
"movs %[tmp1], #5\n\t"
"ror %[x0_e], %[x0_e], %[tmp1]\n\t"
"eor %[x2_e], %[x2_e], %[x0_e]\n\t"
"ror %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[x3_e], %[x3_e], %[x1_e]\n\t"
"movs %[x0_e], %[x0_o]\n\t"
"movs %[x1_e], %[x1_o]\n\t"
"movs %[x1_o], %[x2_e]\n\t"
"movs %[x0_o], %[tmp2]\n\t"
"movs %[tmp2], %[x4_o]\n\t"
"movs %[x4_o], %[x4_e]\n\t"
"movs %[x4_e], %[x3_o]\n\t"
"movs %[x3_o], %[x3_e]\n\t"
"movs %[x3_e], %[x0_e]\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x3_e], %[tmp0], %[x3_e]\n\t"
"movs %[tmp1], #4\n\t"
"movs %[x2_e], %[tmp0]\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x2_e], %[x0_e], %[x2_e]\n\t"
"movs %[tmp1], #9\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x0_e], %[x0_e], %[x3_e]\n\t"
"movs %[tmp1], #10\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[tmp0], %[tmp0], %[x2_e]\n\t"
"movs %[tmp1], #11\n\t"
"movs %[x2_e], %[x1_e]\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x2_e], %[x1_e], %[x2_e]\n\t"
"movs %[x3_e], %[x4_e]\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x3_e], %[x4_e], %[x3_e]\n\t"
"movs %[tmp1], #19\n\t"
"ror %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x1_e], %[x1_e], %[x3_e]\n\t"
"movs %[tmp1], #20\n\t"
"ror %[x2_e], %[x2_e], %[tmp1]\n\t"
"eor %[x4_e], %[x4_e], %[x2_e]\n\t"
"movs %[x2_e], %[x2_o]\n\t"
"movs %[x3_e], %[x1_o]\n\t"
"movs %[x1_o], %[x4_e]\n\t"
"movs %[x2_o], %[tmp0]\n\t"
"movs %[x4_e], #2\n\t"
"mvn %[tmp0], %[tmp2]\n\t"
"ror %[tmp0], %[tmp0], %[x4_e]\n\t"
"eor %[tmp0], %[x2_e], %[tmp0]\n\t"
"movs %[x4_e], #3\n\t"
"mvn %[tmp1], %[x2_e]\n\t"
"ror %[tmp1], %[tmp1], %[x4_e]\n\t"
"eor %[tmp1], %[tmp2], %[tmp1]\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"movs %[x4_e], #1\n\t"
"ror %[tmp0], %[tmp0], %[x4_e]\n\t"
"eor %[tmp2], %[tmp2], %[tmp0]\n\t"
"movs %[x4_e], %[x0_o]\n\t"
"movs %[x0_o], %[x2_o]\n\t"
"movs %[x2_o], %[tmp2]\n\t"
: [x0_e] "+l"(s->w[0][0]), [x1_e] "+l"(s->w[1][0]),
[x2_e] "+l"(s->w[2][0]), [x3_e] "+l"(s->w[3][0]),
[x4_e] "+l"(s->w[4][0]), [x0_o] "+r"(s->w[0][1]),
[x1_o] "+r"(s->w[1][1]), [x2_o] "+r"(s->w[2][1]),
[x3_o] "+r"(s->w[3][1]), [x4_o] "+r"(s->w[4][1]),
[tmp0] "=l"(tmp0), [tmp1] "=l"(tmp1), [tmp2] "=l"(tmp2)
: [C_e] "ri"((uint32_t)C), [C_o] "ri"((uint32_t)(C >> 32))
:);
}
/* ---------------------------------------------------------------- */
void PROUNDS(state_t *s, uint8_t nr)
{
switch (nr)
{
case 12:
ROUND(s, 0xc0000000c);
ROUND(s, 0xc00000009);
ROUND(s, 0x90000000c);
ROUND(s, 0x900000009);
ROUND(s, 0xc00000006);
case 7:
ROUND(s, 0xc00000003);
case 6:
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
default:
ROUND(s, 0x300000009);
;
}
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
#include "api.h"
#include "isap.h"
#include "crypto_aead.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen + ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0)
{
isap_enc(k, npub, m, mlen, c);
}
// Generate tag
unsigned char *tag = c + mlen;
isap_mac(k, npub, ad, adlen, c, mlen, tag);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
(void)nsec;
// Plaintext length is clen - tag length
*mlen = clen - ISAP_TAG_SZ;
// Generate tag
unsigned char T[16];
isap_mac(k, npub, ad, adlen, c, *mlen, T);
// Compare tag
unsigned char T_star[16];
for (int i = 0; i < 16; i++)
{
T_star[i] = *(c + *mlen + i);
}
int eq_cnt = pvp(T, T_star);
// Perform decryption if tag is correct
if (eq_cnt == ISAP_TAG_SZ)
{
if (*mlen > 0)
{
isap_enc(k, npub, c, *mlen, m);
}
return 0;
}
else
{
return -1;
}
}
#include <string.h>
#include <inttypes.h>
#include "api.h"
#include "isap.h"
#include "asconp.h"
#include "config.h"
// Needed for LR tag comparison
uint64_t s0, s1;
forceinline void ABSORB_LANES(state_t *s, const uint8_t *src, uint64_t len)
{
while (len >= 8)
{
// Absorb full lanes
lane_t t0 = U64TOWORD(*(lane_t *)(src + 0));
s->x[0] ^= t0.x;
len -= ISAP_rH / 8;
src += ISAP_rH / 8;
P_sH;
}
if (len > 0)
{
// Absorb partial lane and padding
size_t i;
lane_t t0 = {0};
for (i = 0; i < len; i++)
{
t0.b[7 - i] ^= *src;
src++;
}
t0.b[7 - i] ^= 0x80;
t0 = TOBI(t0);
s->x[0] ^= t0.x;
P_sH;
}
else
{
// Absorb padded empty lane
s->b[0][7] ^= 0x80;
P_sH;
}
}
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const uint8_t *k,
const uint8_t *iv,
const uint8_t *y,
state_t *out,
const size_t outlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(k + 0));
s->l[1] = U64TOWORD(*(lane_t *)(k + 8));
s->l[2] = U64TOWORD(*(lane_t *)(iv + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sK;
// Absorb Y, bit by bit
for (size_t i = 0; i < 16; i++)
{
uint8_t y_byte = *y;
s->b[0][7] ^= (y_byte & 0x80) << 0;
P_sB;
s->b[0][7] ^= (y_byte & 0x40) << 1;
P_sB;
s->b[0][7] ^= (y_byte & 0x20) << 2;
P_sB;
s->b[0][7] ^= (y_byte & 0x10) << 3;
P_sB;
s->b[0][7] ^= (y_byte & 0x08) << 4;
P_sB;
s->b[0][7] ^= (y_byte & 0x04) << 5;
P_sB;
s->b[0][7] ^= (y_byte & 0x02) << 6;
P_sB;
s->b[0][7] ^= (y_byte & 0x01) << 7;
if (i != 15)
{
P_sB;
y += 1;
}
}
// Squeeze K*
P_sK;
out->x[0] = s->x[0];
out->x[1] = s->x[1];
if (outlen > 16)
{
out->x[2] = s->x[2];
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, uint64_t adlen,
const uint8_t *c, uint64_t clen,
uint8_t *tag)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[1] = U64TOWORD(*(lane_t *)(npub + 8));
s->l[2] = U64TOWORD(*(lane_t *)(ISAP_IV_A + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb associated data
ABSORB_LANES(s, ad, adlen);
// Domain seperation
s->w[4][0] ^= 0x1UL;
// Absorb ciphertext
ABSORB_LANES(s, c, clen);
// Needed for LR tag comparison
s0 = s->x[0];
s1 = s->x[1];
// Derive KA*
s->l[0] = WORDTOU64(s->l[0]);
s->l[1] = WORDTOU64(s->l[1]);
isap_rk(k, ISAP_IV_KA, (const uint8_t *)(s->b), s, CRYPTO_KEYBYTES);
// Squeeze tag
P_sH;
lane_t t0 = WORDTOU64(s->l[0]);
memcpy(tag + 0, t0.b, 8);
t0 = WORDTOU64(s->l[1]);
memcpy(tag + 8, t0.b, 8);
}
/******************************************************************************/
/* Leakage resilient tag comparison. */
/* Similar to Fig.3 of https://eprint.iacr.org/2021/402.pdf. */
/******************************************************************************/
int pvp(
const uint8_t *T, const uint8_t *T_star)
{
state_t state;
state_t *s = &state;
// Calculate U
s->l[0] = U64TOWORD(*(lane_t *)(T + 0));
s->l[1] = U64TOWORD(*(lane_t *)(T + 8));
s->x[2] = s0;
s->x[3] = s1;
s->x[4] = 0;
P_PVP;
uint64_t u0, u1;
u0 = s->x[0];
u1 = s->x[1];
// Calculate U'
s->l[0] = U64TOWORD(*(lane_t *)(T_star + 0));
s->l[1] = U64TOWORD(*(lane_t *)(T_star + 8));
s->x[2] = s0;
s->x[3] = s1;
s->x[4] = 0;
P_PVP;
// Compare tag
int eq_cnt = 0;
for (size_t i = 0; i < 8; i++)
{
eq_cnt += (s->b[0][i] == ((uint8_t *)&u0)[i]);
}
for (size_t i = 0; i < 8; i++)
{
eq_cnt += (s->b[1][i] == ((uint8_t *)&u1)[i]);
}
return eq_cnt;
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, uint64_t mlen,
uint8_t *c)
{
state_t state;
state_t *s = &state;
// Init state
isap_rk(k, ISAP_IV_KE, npub, s, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
s->l[3] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[4] = U64TOWORD(*(lane_t *)(npub + 8));
while (mlen >= ISAP_rH / 8)
{
// Encrypt full lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)c = *(uint64_t *)m ^ t0.x;
mlen -= ISAP_rH / 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
if (mlen > 0)
{
// Encrypt partial lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
for (uint8_t i = 0; i < mlen; i++)
{
*c = *m ^ t0.b[i];
m += 1;
c += 1;
}
}
}
/******************************************************************************/
/* Ascon-Hash */
/******************************************************************************/
#if ENABLE_HASH == 1
int crypto_hash(uint8_t *out, const uint8_t *in, unsigned long long inlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(ASCON_HASH_IV + 0));
s->x[1] = 0;
s->x[2] = 0;
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb input
ABSORB_LANES(s, in, inlen);
for (size_t i = 0; i < 4; i++)
{
// Squeeze full lanes
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)(out + 8 * i) = t0.x;
if (i < 3)
{
P_sH;
}
}
return 0;
}
#endif
#ifndef ISAP_H
#define ISAP_H
#include <inttypes.h>
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, const uint64_t adlen,
const uint8_t *c, const uint64_t clen,
uint8_t *tag);
int pvp(
const unsigned char *T,
const unsigned char *T_star);
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, const uint64_t mlen,
uint8_t *c);
#endif
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH P12ROUNDS(s)
#define P_sB P1ROUNDS(s)
#define P_sE P6ROUNDS(s)
#define P_sK P12ROUNDS(s)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) to_bit_interleaving(U64BIG(x))
#define WORDTOU64(x) U64BIG(from_bit_interleaving(x))
/* ---------------------------------------------------------------- */
#define TOBI(x) to_bit_interleaving(x)
#define FROMBI(x) from_bit_interleaving(x)
/* ---------------------------------------------------------------- */
forceinline lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
forceinline lane_t to_bit_interleaving(lane_t in)
{
uint32_t lo = in.w[0];
uint32_t hi = in.w[1];
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
lane_t out;
out.w[0] = (lo & 0x0000FFFF) | (hi << 16);
out.w[1] = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
forceinline lane_t from_bit_interleaving(lane_t in)
{
uint32_t lo = ((in).w[0] & 0x0000FFFF) | ((in).w[1] << 16);
uint32_t hi = ((in).w[0] >> 16) | ((in).w[1] & 0xFFFF0000);
uint32_t r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
lane_t out;
out.x = (uint64_t)hi << 32 | lo;
return out;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint64_t C)
{
// Based on the round description of Ascon given in the Bachelor's
// thesis: "Optimizing Ascon on RISC-V" of Lars Jellema
// see https://github.com/Lucus16/ascon-riscv/
uint32_t tmp0, tmp1, tmp2;
__asm__ __volatile__(
"eor %[x2_e], %[x2_e], %[C_e]\n\t"
"eor %[tmp0], %[x1_e], %[x2_e]\n\t"
"eor %[tmp1], %[x0_e], %[x4_e]\n\t"
"eor %[tmp2], %[x3_e], %[x4_e]\n\t"
"orn %[x4_e], %[x3_e], %[x4_e]\n\t"
"eor %[x4_e], %[x4_e], %[tmp0]\n\t"
"eor %[x3_e], %[x3_e], %[x1_e]\n\t"
"orr %[x3_e], %[x3_e], %[tmp0]\n\t"
"eor %[x3_e], %[x3_e], %[tmp1]\n\t"
"eor %[x2_e], %[x2_e], %[tmp1]\n\t"
"orr %[x2_e], %[x2_e], %[x1_e]\n\t"
"eor %[x2_e], %[x2_e], %[tmp2]\n\t"
"bic %[x1_e], %[x1_e], %[tmp1]\n\t"
"eor %[x1_e], %[x1_e], %[tmp2]\n\t"
"orr %[x0_e], %[x0_e], %[tmp2]\n\t"
"eor %[x0_e], %[x0_e], %[tmp0]\n\t"
"eor %[x2_o], %[x2_o], %[C_o]\n\t"
"eor %[tmp0], %[x1_o], %[x2_o]\n\t"
"eor %[tmp1], %[x0_o], %[x4_o]\n\t"
"eor %[tmp2], %[x3_o], %[x4_o]\n\t"
"orn %[x4_o], %[x3_o], %[x4_o]\n\t"
"eor %[x4_o], %[x4_o], %[tmp0]\n\t"
"eor %[x3_o], %[x3_o], %[x1_o]\n\t"
"orr %[x3_o], %[x3_o], %[tmp0]\n\t"
"eor %[x3_o], %[x3_o], %[tmp1]\n\t"
"eor %[x2_o], %[x2_o], %[tmp1]\n\t"
"orr %[x2_o], %[x2_o], %[x1_o]\n\t"
"eor %[x2_o], %[x2_o], %[tmp2]\n\t"
"bic %[x1_o], %[x1_o], %[tmp1]\n\t"
"eor %[x1_o], %[x1_o], %[tmp2]\n\t"
"orr %[x0_o], %[x0_o], %[tmp2]\n\t"
"eor %[x0_o], %[x0_o], %[tmp0]\n\t"
"eor %[tmp0], %[x2_e], %[x2_o], ror #4\n\t"
"eor %[tmp1], %[x2_o], %[x2_e], ror #5\n\t"
"eor %[x2_o], %[x2_o], %[tmp0], ror #10\n\t"
"eor %[x2_e], %[x2_e], %[tmp1], ror #9\n\t"
"eor %[tmp0], %[x3_e], %[x3_e], ror #11\n\t"
"eor %[tmp1], %[x3_o], %[x3_o], ror #11\n\t"
"eor %[x3_o], %[x3_o], %[tmp0], ror #20\n\t"
"eor %[x3_e], %[x3_e], %[tmp1], ror #19\n\t"
"eor %[tmp0], %[x4_e], %[x4_o], ror #2\n\t"
"eor %[tmp1], %[x4_o], %[x4_e], ror #3\n\t"
"eor %[x4_o], %[x4_o], %[tmp0], ror #1\n\t"
"eor %[x4_e], %[x4_e], %[tmp1]\n\t"
"eor %[tmp0], %[x0_e], %[x0_o], ror #3\n\t"
"eor %[tmp1], %[x0_o], %[x0_e], ror #4\n\t"
"eor %[x0_e], %[x0_e], %[tmp0], ror #5\n\t"
"eor %[x0_o], %[x0_o], %[tmp1], ror #5\n\t"
"eor %[tmp0], %[x1_e], %[x1_e], ror #17\n\t"
"eor %[tmp1], %[x1_o], %[x1_o], ror #17\n\t"
"eor %[x1_o], %[x1_o], %[tmp0], ror #4\n\t"
"eor %[x1_e], %[x1_e], %[tmp1], ror #3\n\t"
: [x0_e] "+r"(s->w[0][0]), [x0_o] "+r"(s->w[0][1]),
[x1_e] "+r"(s->w[1][0]), [x1_o] "+r"(s->w[1][1]),
[x2_e] "+r"(s->w[2][0]), [x2_o] "+r"(s->w[2][1]),
[x3_e] "+r"(s->w[3][0]), [x3_o] "+r"(s->w[3][1]),
[x4_e] "+r"(s->w[4][0]), [x4_o] "+r"(s->w[4][1]), [tmp0] "=r"(tmp0),
[tmp1] "=r"(tmp1), [tmp2] "=r"(tmp2)
: [C_e] "i"((uint32_t)C), [C_o] "i"((uint32_t)(C >> 32))
:);
}
/* ---------------------------------------------------------------- */
forceinline void P12ROUNDS(state_t *s)
{
ROUND(s, 0xc0000000c);
ROUND(s, 0xc00000009);
ROUND(s, 0x90000000c);
ROUND(s, 0x900000009);
ROUND(s, 0xc00000006);
ROUND(s, 0xc00000003);
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
forceinline void P6ROUNDS(state_t *s)
{
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
forceinline void P1ROUNDS(state_t *s)
{
ROUND(s, 0x300000009);
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H_
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
../bi_32/crypto_aead.c
\ No newline at end of file
../bi_32/forceinline.h
\ No newline at end of file
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH PROUNDS(s, 12)
#define P_sB PROUNDS(s, 1)
#define P_sE PROUNDS(s, 6)
#define P_sK PROUNDS(s, 12)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) to_bit_interleaving(U64BIG(x))
#define WORDTOU64(x) U64BIG(from_bit_interleaving(x))
/* ---------------------------------------------------------------- */
#define TOBI(x) to_bit_interleaving(x)
#define FROMBI(x) from_bit_interleaving(x)
/* ---------------------------------------------------------------- */
lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
lane_t to_bit_interleaving(lane_t in)
{
uint32_t lo = in.w[0];
uint32_t hi = in.w[1];
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
lane_t out;
out.w[0] = (lo & 0x0000FFFF) | (hi << 16);
out.w[1] = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
lane_t from_bit_interleaving(lane_t in)
{
uint32_t lo = ((in).w[0] & 0x0000FFFF) | ((in).w[1] << 16);
uint32_t hi = ((in).w[0] >> 16) | ((in).w[1] & 0xFFFF0000);
uint32_t r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
lane_t out;
out.x = (uint64_t)hi << 32 | lo;
return out;
}
/* ---------------------------------------------------------------- */
forceinline uint32_t ROR32(uint32_t x, int n)
{
return x >> n | x << (-n & 31);
}
/* ---------------------------------------------------------------- */
forceinline uint64_t ROR(uint64_t x, int n)
{
lane_t b, a = {.x = x};
b.w[0] = (n % 2) ? ROR32(a.w[1], (n - 1) / 2) : ROR32(a.w[0], n / 2);
b.w[1] = (n % 2) ? ROR32(a.w[0], (n + 1) / 2) : ROR32(a.w[1], n / 2);
return b.x;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint64_t C)
{
state_t t;
/* round constant */
s->x[2] ^= C;
/* s-box layer */
s->x[0] ^= s->x[4];
s->x[4] ^= s->x[3];
s->x[2] ^= s->x[1];
t.x[0] = s->x[0] ^ (~s->x[1] & s->x[2]);
t.x[2] = s->x[2] ^ (~s->x[3] & s->x[4]);
t.x[4] = s->x[4] ^ (~s->x[0] & s->x[1]);
t.x[1] = s->x[1] ^ (~s->x[2] & s->x[3]);
t.x[3] = s->x[3] ^ (~s->x[4] & s->x[0]);
t.x[1] ^= t.x[0];
t.x[3] ^= t.x[2];
t.x[0] ^= t.x[4];
/* linear layer */
s->x[2] = t.x[2] ^ ROR(t.x[2], 6 - 1);
s->x[3] = t.x[3] ^ ROR(t.x[3], 17 - 10);
s->x[4] = t.x[4] ^ ROR(t.x[4], 41 - 7);
s->x[0] = t.x[0] ^ ROR(t.x[0], 28 - 19);
s->x[1] = t.x[1] ^ ROR(t.x[1], 61 - 39);
s->x[2] = t.x[2] ^ ROR(s->x[2], 1);
s->x[3] = t.x[3] ^ ROR(s->x[3], 10);
s->x[4] = t.x[4] ^ ROR(s->x[4], 7);
s->x[0] = t.x[0] ^ ROR(s->x[0], 19);
s->x[1] = t.x[1] ^ ROR(s->x[1], 39);
s->x[2] = ~s->x[2];
}
/* ---------------------------------------------------------------- */
void PROUNDS(state_t *s, uint8_t nr)
{
switch (nr)
{
case 12:
ROUND(s, 0xc0000000c);
ROUND(s, 0xc00000009);
ROUND(s, 0x90000000c);
ROUND(s, 0x900000009);
ROUND(s, 0xc00000006);
ROUND(s, 0xc00000003);
case 6:
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
default:
ROUND(s, 0x300000009);
;
}
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
../bi_32/crypto_aead.c
\ No newline at end of file
../bi_32/forceinline.h
\ No newline at end of file
../bi_32/api.h
\ No newline at end of file
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH PROUNDS(s, 12)
#define P_sB PROUNDS(s, 1)
#define P_sE PROUNDS(s, 6)
#define P_sK PROUNDS(s, 12)
#define P_PVP PROUNDS(s, 7)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) to_bit_interleaving(U64BIG(x))
#define WORDTOU64(x) U64BIG(from_bit_interleaving(x))
/* ---------------------------------------------------------------- */
#define TOBI(x) to_bit_interleaving(x)
#define FROMBI(x) from_bit_interleaving(x)
/* ---------------------------------------------------------------- */
lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
lane_t to_bit_interleaving(lane_t in)
{
uint32_t lo = in.w[0];
uint32_t hi = in.w[1];
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
lane_t out;
out.w[0] = (lo & 0x0000FFFF) | (hi << 16);
out.w[1] = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
lane_t from_bit_interleaving(lane_t in)
{
uint32_t lo = ((in).w[0] & 0x0000FFFF) | ((in).w[1] << 16);
uint32_t hi = ((in).w[0] >> 16) | ((in).w[1] & 0xFFFF0000);
uint32_t r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
lane_t out;
out.x = (uint64_t)hi << 32 | lo;
return out;
}
/* ---------------------------------------------------------------- */
forceinline uint32_t ROR32(uint32_t x, int n)
{
return x >> n | x << (-n & 31);
}
/* ---------------------------------------------------------------- */
forceinline uint64_t ROR(uint64_t x, int n)
{
lane_t b, a = {.x = x};
b.w[0] = (n % 2) ? ROR32(a.w[1], (n - 1) / 2) : ROR32(a.w[0], n / 2);
b.w[1] = (n % 2) ? ROR32(a.w[0], (n + 1) / 2) : ROR32(a.w[1], n / 2);
return b.x;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint64_t C)
{
state_t t;
/* round constant */
s->x[2] ^= C;
/* s-box layer */
s->x[0] ^= s->x[4];
s->x[4] ^= s->x[3];
s->x[2] ^= s->x[1];
t.x[0] = s->x[0] ^ (~s->x[1] & s->x[2]);
t.x[2] = s->x[2] ^ (~s->x[3] & s->x[4]);
t.x[4] = s->x[4] ^ (~s->x[0] & s->x[1]);
t.x[1] = s->x[1] ^ (~s->x[2] & s->x[3]);
t.x[3] = s->x[3] ^ (~s->x[4] & s->x[0]);
t.x[1] ^= t.x[0];
t.x[3] ^= t.x[2];
t.x[0] ^= t.x[4];
/* linear layer */
s->x[2] = t.x[2] ^ ROR(t.x[2], 6 - 1);
s->x[3] = t.x[3] ^ ROR(t.x[3], 17 - 10);
s->x[4] = t.x[4] ^ ROR(t.x[4], 41 - 7);
s->x[0] = t.x[0] ^ ROR(t.x[0], 28 - 19);
s->x[1] = t.x[1] ^ ROR(t.x[1], 61 - 39);
s->x[2] = t.x[2] ^ ROR(s->x[2], 1);
s->x[3] = t.x[3] ^ ROR(s->x[3], 10);
s->x[4] = t.x[4] ^ ROR(s->x[4], 7);
s->x[0] = t.x[0] ^ ROR(s->x[0], 19);
s->x[1] = t.x[1] ^ ROR(s->x[1], 39);
s->x[2] = ~s->x[2];
}
/* ---------------------------------------------------------------- */
void PROUNDS(state_t *s, uint8_t nr)
{
switch (nr)
{
case 12:
ROUND(s, 0xc0000000c);
ROUND(s, 0xc00000009);
ROUND(s, 0x90000000c);
ROUND(s, 0x900000009);
ROUND(s, 0xc00000006);
case 7:
ROUND(s, 0xc00000003);
case 6:
ROUND(s, 0x900000006);
ROUND(s, 0x900000003);
ROUND(s, 0x60000000c);
ROUND(s, 0x600000009);
ROUND(s, 0x30000000c);
default:
ROUND(s, 0x300000009);
;
}
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
#include "api.h"
#include "isap.h"
#include "crypto_aead.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen + ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0)
{
isap_enc(k, npub, m, mlen, c);
}
// Generate tag
unsigned char *tag = c + mlen;
isap_mac(k, npub, ad, adlen, c, mlen, tag);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
(void)nsec;
// Plaintext length is clen - tag length
*mlen = clen - ISAP_TAG_SZ;
// Generate tag
unsigned char T[16];
isap_mac(k, npub, ad, adlen, c, *mlen, T);
// Compare tag
unsigned char T_star[16];
for (int i = 0; i < 16; i++)
{
T_star[i] = *(c + *mlen + i);
}
int eq_cnt = pvp(T, T_star);
// Perform decryption if tag is correct
if (eq_cnt == ISAP_TAG_SZ)
{
if (*mlen > 0)
{
isap_enc(k, npub, c, *mlen, m);
}
return 0;
}
else
{
return -1;
}
}
../bi_32/forceinline.h
\ No newline at end of file
#include <string.h>
#include <inttypes.h>
#include "api.h"
#include "isap.h"
#include "asconp.h"
#include "config.h"
// Needed for LR tag comparison
uint64_t s0, s1;
forceinline void ABSORB_LANES(state_t *s, const uint8_t *src, uint64_t len)
{
while (len >= 8)
{
// Absorb full lanes
lane_t t0 = U64TOWORD(*(lane_t *)(src + 0));
s->x[0] ^= t0.x;
len -= ISAP_rH / 8;
src += ISAP_rH / 8;
P_sH;
}
if (len > 0)
{
// Absorb partial lane and padding
size_t i;
lane_t t0 = {0};
for (i = 0; i < len; i++)
{
t0.b[7 - i] ^= *src;
src++;
}
t0.b[7 - i] ^= 0x80;
t0 = TOBI(t0);
s->x[0] ^= t0.x;
P_sH;
}
else
{
// Absorb padded empty lane
s->b[0][7] ^= 0x80;
P_sH;
}
}
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const uint8_t *k,
const uint8_t *iv,
const uint8_t *y,
state_t *out,
const size_t outlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(k + 0));
s->l[1] = U64TOWORD(*(lane_t *)(k + 8));
s->l[2] = U64TOWORD(*(lane_t *)(iv + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sK;
// Absorb Y, bit by bit
for (size_t i = 0; i < 16; i++)
{
uint8_t y_byte = *y;
s->b[0][7] ^= (y_byte & 0x80) << 0;
P_sB;
s->b[0][7] ^= (y_byte & 0x40) << 1;
P_sB;
s->b[0][7] ^= (y_byte & 0x20) << 2;
P_sB;
s->b[0][7] ^= (y_byte & 0x10) << 3;
P_sB;
s->b[0][7] ^= (y_byte & 0x08) << 4;
P_sB;
s->b[0][7] ^= (y_byte & 0x04) << 5;
P_sB;
s->b[0][7] ^= (y_byte & 0x02) << 6;
P_sB;
s->b[0][7] ^= (y_byte & 0x01) << 7;
if (i != 15)
{
P_sB;
y += 1;
}
}
// Squeeze K*
P_sK;
out->x[0] = s->x[0];
out->x[1] = s->x[1];
if (outlen > 16)
{
out->x[2] = s->x[2];
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, uint64_t adlen,
const uint8_t *c, uint64_t clen,
uint8_t *tag)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[1] = U64TOWORD(*(lane_t *)(npub + 8));
s->l[2] = U64TOWORD(*(lane_t *)(ISAP_IV_A + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb associated data
ABSORB_LANES(s, ad, adlen);
// Domain seperation
s->w[4][0] ^= 0x1UL;
// Absorb ciphertext
ABSORB_LANES(s, c, clen);
// Needed for LR tag comparison
s0 = s->x[0];
s1 = s->x[1];
// Derive KA*
s->l[0] = WORDTOU64(s->l[0]);
s->l[1] = WORDTOU64(s->l[1]);
isap_rk(k, ISAP_IV_KA, (const uint8_t *)(s->b), s, CRYPTO_KEYBYTES);
// Squeeze tag
P_sH;
lane_t t0 = WORDTOU64(s->l[0]);
memcpy(tag + 0, t0.b, 8);
t0 = WORDTOU64(s->l[1]);
memcpy(tag + 8, t0.b, 8);
}
/******************************************************************************/
/* Leakage resilient tag comparison. */
/* Similar to Fig.3 of https://eprint.iacr.org/2021/402.pdf. */
/******************************************************************************/
int pvp(
const uint8_t *T, const uint8_t *T_star)
{
state_t state;
state_t *s = &state;
// Calculate U
s->l[0] = U64TOWORD(*(lane_t *)(T + 0));
s->l[1] = U64TOWORD(*(lane_t *)(T + 8));
s->x[2] = s0;
s->x[3] = s1;
s->x[4] = 0;
P_PVP;
uint64_t u0, u1;
u0 = s->x[0];
u1 = s->x[1];
// Calculate U'
s->l[0] = U64TOWORD(*(lane_t *)(T_star + 0));
s->l[1] = U64TOWORD(*(lane_t *)(T_star + 8));
s->x[2] = s0;
s->x[3] = s1;
s->x[4] = 0;
P_PVP;
// Compare tag
int eq_cnt = 0;
for (size_t i = 0; i < 8; i++)
{
eq_cnt += (s->b[0][i] == ((uint8_t *)&u0)[i]);
}
for (size_t i = 0; i < 8; i++)
{
eq_cnt += (s->b[1][i] == ((uint8_t *)&u1)[i]);
}
return eq_cnt;
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, uint64_t mlen,
uint8_t *c)
{
state_t state;
state_t *s = &state;
// Init state
isap_rk(k, ISAP_IV_KE, npub, s, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
s->l[3] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[4] = U64TOWORD(*(lane_t *)(npub + 8));
while (mlen >= ISAP_rH / 8)
{
// Encrypt full lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)c = *(uint64_t *)m ^ t0.x;
mlen -= ISAP_rH / 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
if (mlen > 0)
{
// Encrypt partial lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
for (uint8_t i = 0; i < mlen; i++)
{
*c = *m ^ t0.b[i];
m += 1;
c += 1;
}
}
}
/******************************************************************************/
/* Ascon-Hash */
/******************************************************************************/
#if ENABLE_HASH == 1
int crypto_hash(uint8_t *out, const uint8_t *in, unsigned long long inlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(ASCON_HASH_IV + 0));
s->x[1] = 0;
s->x[2] = 0;
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb input
ABSORB_LANES(s, in, inlen);
for (size_t i = 0; i < 4; i++)
{
// Squeeze full lanes
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)(out + 8 * i) = t0.x;
if (i < 3)
{
P_sH;
}
}
return 0;
}
#endif
#ifndef ISAP_H
#define ISAP_H
#include <inttypes.h>
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, const uint64_t adlen,
const uint8_t *c, const uint64_t clen,
uint8_t *tag);
int pvp(
const unsigned char *T,
const unsigned char *T_star);
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, const uint64_t mlen,
uint8_t *c);
#endif
#ifndef API_H
#define API_H
// isapa128a
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
// asconhashv12
#define CRYPTO_BYTES 32
#define ASCON_HASH_OUTLEN 32
#define ASCON_HASH_ROUNDS 12
#endif
#ifndef ASCONP_H
#define ASCONP_H
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH PROUNDS(s, 12)
#define P_sB PROUNDS(s, 1)
#define P_sE PROUNDS(s, 6)
#define P_sK PROUNDS(s, 12)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) interleave8(U64BIG(x))
#define WORDTOU64(x) U64BIG(interleave8(x))
/* ---------------------------------------------------------------- */
#define TOBI(x) interleave8(x)
#define FROMBI(x) interleave8(x)
/* ---------------------------------------------------------------- */
lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
forceinline uint8_t ROR8(uint8_t a, int n) { return a >> n | a << (8 - n); }
/* ---------------------------------------------------------------- */
forceinline uint64_t ROR(uint64_t x, int n)
{
lane_t b, a = {.x = x};
b.b[0] = ROR8(a.b[(n + 0) & 0x7], (n + 0) >> 3);
b.b[1] = ROR8(a.b[(n + 1) & 0x7], (n + 1) >> 3);
b.b[2] = ROR8(a.b[(n + 2) & 0x7], (n + 2) >> 3);
b.b[3] = ROR8(a.b[(n + 3) & 0x7], (n + 3) >> 3);
b.b[4] = ROR8(a.b[(n + 4) & 0x7], (n + 4) >> 3);
b.b[5] = ROR8(a.b[(n + 5) & 0x7], (n + 5) >> 3);
b.b[6] = ROR8(a.b[(n + 6) & 0x7], (n + 6) >> 3);
b.b[7] = ROR8(a.b[(n + 7) & 0x7], (n + 7) >> 3);
return b.x;
}
/* ---------------------------------------------------------------- */
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
forceinline lane_t interleave8(lane_t x)
{
x.x = (x.x & 0xaa55aa55aa55aa55ull) | ((x.x & 0x00aa00aa00aa00aaull) << 7) |
((x.x >> 7) & 0x00aa00aa00aa00aaull);
x.x = (x.x & 0xcccc3333cccc3333ull) | ((x.x & 0x0000cccc0000ccccull) << 14) |
((x.x >> 14) & 0x0000cccc0000ccccull);
x.x = (x.x & 0xf0f0f0f00f0f0f0full) | ((x.x & 0x00000000f0f0f0f0ull) << 28) |
((x.x >> 28) & 0x00000000f0f0f0f0ull);
return x;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint64_t C)
{
uint64_t xtemp;
/* round constant */
s->x[2] ^= C;
/* s-box layer */
s->x[0] ^= s->x[4];
s->x[4] ^= s->x[3];
s->x[2] ^= s->x[1];
xtemp = s->x[0] & ~s->x[4];
s->x[0] ^= s->x[2] & ~s->x[1];
s->x[2] ^= s->x[4] & ~s->x[3];
s->x[4] ^= s->x[1] & ~s->x[0];
s->x[1] ^= s->x[3] & ~s->x[2];
s->x[3] ^= xtemp;
s->x[1] ^= s->x[0];
s->x[3] ^= s->x[2];
s->x[0] ^= s->x[4];
/* linear layer */
xtemp = s->x[0] ^ ROR(s->x[0], 28 - 19);
s->x[0] ^= ROR(xtemp, 19);
xtemp = s->x[1] ^ ROR(s->x[1], 61 - 39);
s->x[1] ^= ROR(xtemp, 39);
xtemp = s->x[2] ^ ROR(s->x[2], 6 - 1);
s->x[2] ^= ROR(xtemp, 1);
xtemp = s->x[3] ^ ROR(s->x[3], 17 - 10);
s->x[3] ^= ROR(xtemp, 10);
xtemp = s->x[4] ^ ROR(s->x[4], 41 - 7);
s->x[4] ^= ROR(xtemp, 7);
s->x[2] = ~s->x[2];
}
/* ---------------------------------------------------------------- */
void PROUNDS(state_t *s, uint8_t nr)
{
switch (nr)
{
case 12:
ROUND(s, 0x0101010100000000ull);
ROUND(s, 0x0101010000000001ull);
ROUND(s, 0x0101000100000100ull);
ROUND(s, 0x0101000000000101ull);
ROUND(s, 0x0100010100010000ull);
ROUND(s, 0x0100010000010001ull);
case 6:
ROUND(s, 0x0100000100010100ull);
ROUND(s, 0x0100000000010101ull);
ROUND(s, 0x0001010101000000ull);
ROUND(s, 0x0001010001000001ull);
ROUND(s, 0x0001000101000100ull);
default:
ROUND(s, 0x0001000001000101ull);
}
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
#include "api.h"
#include "isap.h"
#include "crypto_aead.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Plaintext length is clen - tag length
*mlen = clen-ISAP_TAG_SZ;
// Generate tag
unsigned char tag[ISAP_TAG_SZ];
isap_mac(k,npub,ad,adlen,c,*mlen,tag);
// Compare tag
unsigned long eq_cnt = 0;
for(unsigned int i = 0; i < ISAP_TAG_SZ; i++) {
eq_cnt += (tag[i] == c[(*mlen)+i]);
}
// Perform decryption if tag is correct
if(eq_cnt == (unsigned long)ISAP_TAG_SZ){
if (*mlen > 0) {
isap_enc(k,npub,c,*mlen,m);
}
return 0;
} else {
return -1;
}
}
#ifndef FORCEINLINE_H_
#define FORCEINLINE_H_
/* define forceinline macro */
#ifdef _MSC_VER
#define forceinline __forceinline
#elif defined(__GNUC__)
#define forceinline inline __attribute__((__always_inline__))
#elif defined(__CLANG__)
#if __has_attribute(__always_inline__)
#define forceinline inline __attribute__((__always_inline__))
#else
#define forceinline inline
#endif
#else
#define forceinline inline
#endif
#endif /* FORCEINLINE_H_ */
#include <string.h>
#include <inttypes.h>
#include "api.h"
#include "isap.h"
#include "asconp.h"
#include "config.h"
forceinline void ABSORB_LANES(state_t *s, const uint8_t *src, uint64_t len)
{
while (len >= 8)
{
// Absorb full lanes
lane_t t0 = U64TOWORD(*(lane_t *)(src + 0));
s->x[0] ^= t0.x;
len -= ISAP_rH / 8;
src += ISAP_rH / 8;
P_sH;
}
if (len > 0)
{
// Absorb partial lane and padding
size_t i;
lane_t t0 = {0};
for (i = 0; i < len; i++)
{
t0.b[7 - i] ^= *src;
src++;
}
t0.b[7 - i] ^= 0x80;
t0 = TOBI(t0);
s->x[0] ^= t0.x;
P_sH;
}
else
{
// Absorb padded empty lane
s->b[0][7] ^= 0x80;
P_sH;
}
}
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const uint8_t *k,
const uint8_t *iv,
const uint8_t *y,
state_t *out,
const size_t outlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(k + 0));
s->l[1] = U64TOWORD(*(lane_t *)(k + 8));
s->l[2] = U64TOWORD(*(lane_t *)(iv + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sK;
// Absorb Y, bit by bit
for (size_t i = 0; i < 16; i++)
{
uint8_t y_byte = *y;
s->b[0][7] ^= (y_byte & 0x80) << 0;
P_sB;
s->b[0][7] ^= (y_byte & 0x40) << 1;
P_sB;
s->b[0][7] ^= (y_byte & 0x20) << 2;
P_sB;
s->b[0][7] ^= (y_byte & 0x10) << 3;
P_sB;
s->b[0][7] ^= (y_byte & 0x08) << 4;
P_sB;
s->b[0][7] ^= (y_byte & 0x04) << 5;
P_sB;
s->b[0][7] ^= (y_byte & 0x02) << 6;
P_sB;
s->b[0][7] ^= (y_byte & 0x01) << 7;
if (i != 15)
{
P_sB;
y += 1;
}
}
// Squeeze K*
P_sK;
out->x[0] = s->x[0];
out->x[1] = s->x[1];
if (outlen > 16)
{
out->x[2] = s->x[2];
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, uint64_t adlen,
const uint8_t *c, uint64_t clen,
uint8_t *tag)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[1] = U64TOWORD(*(lane_t *)(npub + 8));
s->l[2] = U64TOWORD(*(lane_t *)(ISAP_IV_A + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb associated data
ABSORB_LANES(s, ad, adlen);
// Domain seperation
s->w[4][0] ^= 0x1UL;
// Absorb ciphertext
ABSORB_LANES(s, c, clen);
// Derive KA*
s->l[0] = WORDTOU64(s->l[0]);
s->l[1] = WORDTOU64(s->l[1]);
isap_rk(k, ISAP_IV_KA, (const uint8_t *)(s->b), s, CRYPTO_KEYBYTES);
// Squeeze tag
P_sH;
lane_t t0 = WORDTOU64(s->l[0]);
memcpy(tag + 0, t0.b, 8);
t0 = WORDTOU64(s->l[1]);
memcpy(tag + 8, t0.b, 8);
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, uint64_t mlen,
uint8_t *c)
{
state_t state;
state_t *s = &state;
// Init state
isap_rk(k, ISAP_IV_KE, npub, s, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
s->l[3] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[4] = U64TOWORD(*(lane_t *)(npub + 8));
while (mlen >= ISAP_rH / 8)
{
// Encrypt full lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)c = *(uint64_t *)m ^ t0.x;
mlen -= ISAP_rH / 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
if (mlen > 0)
{
// Encrypt partial lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
for (uint8_t i = 0; i < mlen; i++)
{
*c = *m ^ t0.b[i];
m += 1;
c += 1;
}
}
}
/******************************************************************************/
/* Ascon-Hash */
/******************************************************************************/
#if ENABLE_HASH == 1
int crypto_hash(uint8_t *out, const uint8_t *in, unsigned long long inlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(ASCON_HASH_IV + 0));
s->x[1] = 0;
s->x[2] = 0;
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb input
ABSORB_LANES(s, in, inlen);
for (size_t i = 0; i < 4; i++)
{
// Squeeze full lanes
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)(out + 8 * i) = t0.x;
if (i < 3)
{
P_sH;
}
}
return 0;
}
#endif
#ifndef ISAP_H
#define ISAP_H
#include <inttypes.h>
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, const uint64_t adlen,
const uint8_t *c, const uint64_t clen,
uint8_t *tag);
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, const uint64_t mlen,
uint8_t *c);
#endif
#ifndef API_H #ifndef API_H
#define API_H #define API_H
#define CRYPTO_VERSION "2.0.1" // isapa128a
#define CRYPTO_KEYBYTES 16 #define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0 #define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
// asconhashv12
#define CRYPTO_BYTES 32
#define ASCON_HASH_OUTLEN 32
#define ASCON_HASH_ROUNDS 12
#endif #endif
...@@ -2,139 +2,122 @@ ...@@ -2,139 +2,122 @@
#define ASCONP_H_ #define ASCONP_H_
#include <inttypes.h> #include <inttypes.h>
#include "forceinline.h"
typedef unsigned char u8; typedef union
typedef uint32_t u32; {
typedef unsigned long long u64; uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef struct typedef union
{ {
u32 e; lane_t l[5];
u32 o; uint64_t x[5];
} u32_2; uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH P12ROUNDS(s)
#define P_sB P1ROUNDS(s)
#define P_sE P6ROUNDS(s)
#define P_sK P12ROUNDS(s)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) U64BIG(x)
#define WORDTOU64(x) U64BIG(x)
// Round constants, bit-interleaved /* ---------------------------------------------------------------- */
u32 rc_o[12] = {0xc, 0xc, 0x9, 0x9, 0xc, 0xc, 0x9, 0x9, 0x6, 0x6, 0x3, 0x3};
u32 rc_e[12] = {0xc, 0x9, 0xc, 0x9, 0x6, 0x3, 0x6, 0x3, 0xc, 0x9, 0xc, 0x9}; #define TOBI(x) (x)
#define FROMBI(x) (x)
/* ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- */
u64 U64BIG(u64 x) forceinline lane_t U64BIG(lane_t x)
{ {
return ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | (((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | (((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)); (((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
} }
/* ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 forceinline void ROUND(state_t *s, uint8_t C)
void to_bit_interleaving(u32_2 *out, u64 in)
{ {
u32 hi = (in) >> 32; uint64_t xtemp;
u32 lo = (u32)(in); /* round constant */
u32 r0, r1; s->x[2] ^= C;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); /* s-box layer */
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); s->x[0] ^= s->x[4];
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); s->x[4] ^= s->x[3];
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); s->x[2] ^= s->x[1];
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); xtemp = s->x[0] & ~s->x[4];
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); s->x[0] ^= s->x[2] & ~s->x[1];
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); s->x[2] ^= s->x[4] & ~s->x[3];
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); s->x[4] ^= s->x[1] & ~s->x[0];
(*out).e = (lo & 0x0000FFFF) | (hi << 16); s->x[1] ^= s->x[3] & ~s->x[2];
(*out).o = (lo >> 16) | (hi & 0xFFFF0000); s->x[3] ^= xtemp;
s->x[1] ^= s->x[0];
s->x[3] ^= s->x[2];
s->x[0] ^= s->x[4];
s->x[2] = ~s->x[2];
/* linear layer */
s->x[0] ^=
(s->x[0] >> 19) ^ (s->x[0] << 45) ^ (s->x[0] >> 28) ^ (s->x[0] << 36);
s->x[1] ^=
(s->x[1] >> 61) ^ (s->x[1] << 3) ^ (s->x[1] >> 39) ^ (s->x[1] << 25);
s->x[2] ^=
(s->x[2] >> 1) ^ (s->x[2] << 63) ^ (s->x[2] >> 6) ^ (s->x[2] << 58);
s->x[3] ^=
(s->x[3] >> 10) ^ (s->x[3] << 54) ^ (s->x[3] >> 17) ^ (s->x[3] << 47);
s->x[4] ^=
(s->x[4] >> 7) ^ (s->x[4] << 57) ^ (s->x[4] >> 41) ^ (s->x[4] << 23);
} }
/* ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 forceinline void P12ROUNDS(state_t *s)
void from_bit_interleaving(u64 *out, u32_2 in)
{ {
u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); ROUND(s, 0xf0);
u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); ROUND(s, 0xe1);
u32 r0, r1; ROUND(s, 0xd2);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); ROUND(s, 0xc3);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); ROUND(s, 0xb4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); ROUND(s, 0xa5);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); ROUND(s, 0x96);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); ROUND(s, 0x87);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); ROUND(s, 0x78);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); ROUND(s, 0x69);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); ROUND(s, 0x5a);
*out = (u64)hi << 32 | lo; ROUND(s, 0x4b);
} }
/* ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- */
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) forceinline void P6ROUNDS(state_t *s)
{
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
/* ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- */
void static inline PX(u32 rounds, u32_2* x0, u32_2* x1, u32_2* x2, u32_2* x3, u32_2* x4) { forceinline void P1ROUNDS(state_t *s)
u32_2 t0, t1, t2, t3, t4; {
for (u32 r = 12 - rounds; r < 12; r++){ ROUND(s, 0x4b);
/* rcon */
(*x2).e ^= rc_e[r];
(*x2).o ^= rc_o[r];
/* non-linear layer */
(*x0).e ^= (*x4).e;
(*x0).o ^= (*x4).o;
(*x4).e ^= (*x3).e;
(*x4).o ^= (*x3).o;
(*x2).e ^= (*x1).e;
(*x2).o ^= (*x1).o;
(t0).e = (*x0).e;
(t0).o = (*x0).o;
(t4).e = (*x4).e;
(t4).o = (*x4).o;
(t3).e = (*x3).e;
(t3).o = (*x3).o;
(t1).e = (*x1).e;
(t1).o = (*x1).o;
(t2).e = (*x2).e;
(t2).o = (*x2).o;
(*x0).e = t0.e ^ (~t1.e & t2.e);
(*x0).o = t0.o ^ (~t1.o & t2.o);
(*x2).e = t2.e ^ (~t3.e & t4.e);
(*x2).o = t2.o ^ (~t3.o & t4.o);
(*x4).e = t4.e ^ (~t0.e & t1.e);
(*x4).o = t4.o ^ (~t0.o & t1.o);
(*x1).e = t1.e ^ (~t2.e & t3.e);
(*x1).o = t1.o ^ (~t2.o & t3.o);
(*x3).e = t3.e ^ (~t4.e & t0.e);
(*x3).o = t3.o ^ (~t4.o & t0.o);
(*x1).e ^= (*x0).e;
(*x1).o ^= (*x0).o;
(*x3).e ^= (*x2).e;
(*x3).o ^= (*x2).o;
(*x0).e ^= (*x4).e;
(*x0).o ^= (*x4).o;
/* linear layer */
t0.e = (*x0).e ^ ROTR32((*x0).o, 4);
t0.o = (*x0).o ^ ROTR32((*x0).e, 5);
t1.e = (*x1).e ^ ROTR32((*x1).e, 11);
t1.o = (*x1).o ^ ROTR32((*x1).o, 11);
t2.e = (*x2).e ^ ROTR32((*x2).o, 2);
t2.o = (*x2).o ^ ROTR32((*x2).e, 3);
t3.e = (*x3).e ^ ROTR32((*x3).o, 3);
t3.o = (*x3).o ^ ROTR32((*x3).e, 4);
t4.e = (*x4).e ^ ROTR32((*x4).e, 17);
t4.o = (*x4).o ^ ROTR32((*x4).o, 17);
(*x0).e ^= ROTR32(t0.o, 9);
(*x0).o ^= ROTR32(t0.e, 10);
(*x1).e ^= ROTR32(t1.o, 19);
(*x1).o ^= ROTR32(t1.e, 20);
(*x2).e ^= t2.o;
(*x2).o ^= ROTR32(t2.e, 1);
(*x3).e ^= ROTR32(t3.e, 5);
(*x3).o ^= ROTR32(t3.o, 5);
(*x4).e ^= ROTR32(t4.o, 3);
(*x4).o ^= ROTR32(t4.e, 4);
(*x2).e = ~(*x2).e;
(*x2).o = ~(*x2).o;
}
} }
/* ---------------------------------------------------------------- */
#endif // ASCONP_H_ #endif // ASCONP_H_
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
#ifndef FORCEINLINE_H_
#define FORCEINLINE_H_
/* define forceinline macro */
#ifdef _MSC_VER
#define forceinline __forceinline
#elif defined(__GNUC__)
#define forceinline inline __attribute__((__always_inline__))
#elif defined(__CLANG__)
#if __has_attribute(__always_inline__)
#define forceinline inline __attribute__((__always_inline__))
#else
#define forceinline inline
#endif
#else
#define forceinline inline
#endif
#endif /* FORCEINLINE_H_ */
#include <string.h>
#include <inttypes.h>
#include "api.h" #include "api.h"
#include "isap.h" #include "isap.h"
#include "asconp.h" #include "asconp.h"
#include "config.h"
const u8 ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK}; forceinline void ABSORB_LANES(state_t *s, const uint8_t *src, uint64_t len)
const u8 ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK}; {
const u8 ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK}; while (len >= 8)
{
// Absorb full lanes
lane_t t0 = U64TOWORD(*(lane_t *)(src + 0));
s->x[0] ^= t0.x;
len -= ISAP_rH / 8;
src += ISAP_rH / 8;
P_sH;
}
#define P_sB PX(1,&x0,&x1,&x2,&x3,&x4) if (len > 0)
#define P_sE PX(6,&x0,&x1,&x2,&x3,&x4) {
#define P_sH PX(12,&x0,&x1,&x2,&x3,&x4) // Absorb partial lane and padding
#define P_sK PX(12,&x0,&x1,&x2,&x3,&x4) size_t i;
lane_t t0 = {0};
for (i = 0; i < len; i++)
{
t0.b[7 - i] ^= *src;
src++;
}
t0.b[7 - i] ^= 0x80;
t0 = TOBI(t0);
s->x[0] ^= t0.x;
P_sH;
}
else
{
// Absorb padded empty lane
s->b[0][7] ^= 0x80;
P_sH;
}
}
/******************************************************************************/ /******************************************************************************/
/* ISAP_RK */ /* ISAP_RK */
/******************************************************************************/ /******************************************************************************/
void isap_rk( void isap_rk(
const u8 *k, const uint8_t *k,
const u8 *iv, const uint8_t *iv,
const u8 *y, const uint8_t *y,
u8 *out, state_t *out,
const u8 outlen) const size_t outlen)
{ {
// State variables state_t state;
u32_2 x0, x1, x2, x3, x4; state_t *s = &state;
// Initialize // Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)(k + 0))); s->l[0] = U64TOWORD(*(lane_t *)(k + 0));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(k + 8))); s->l[1] = U64TOWORD(*(lane_t *)(k + 8));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(iv + 0))); s->l[2] = U64TOWORD(*(lane_t *)(iv + 0));
x3.o = 0; s->x[3] = 0;
x3.e = 0; s->x[4] = 0;
x4.o = 0;
x4.e = 0;
P_sK; P_sK;
// Absorb Y, bit by bit // Absorb Y, bit by bit
for (u8 i = 0; i < 127; i++) { for (size_t i = 0; i < 16; i++)
u8 cur_byte_pos = i / 8; {
u8 cur_bit_pos = 7 - (i % 8); uint8_t y_byte = *y;
u32 cur_bit = ((y[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7; s->b[0][7] ^= (y_byte & 0x80) << 0;
x0.o ^= ((u32)cur_bit) << 24; P_sB;
s->b[0][7] ^= (y_byte & 0x40) << 1;
P_sB;
s->b[0][7] ^= (y_byte & 0x20) << 2;
P_sB;
s->b[0][7] ^= (y_byte & 0x10) << 3;
P_sB;
s->b[0][7] ^= (y_byte & 0x08) << 4;
P_sB; P_sB;
s->b[0][7] ^= (y_byte & 0x04) << 5;
P_sB;
s->b[0][7] ^= (y_byte & 0x02) << 6;
P_sB;
s->b[0][7] ^= (y_byte & 0x01) << 7;
if (i != 15)
{
P_sB;
y += 1;
}
} }
u8 cur_bit = ((y[15]) & 0x01) << 7;
x0.o ^= ((u32)cur_bit) << (24);
// Squeeze - Derive K* // Squeeze K*
P_sK; P_sK;
*(u32 *)(out + 0) = x0.o; out->x[0] = s->x[0];
*(u32 *)(out + 4) = x0.e; out->x[1] = s->x[1];
*(u32 *)(out + 8) = x1.o; if (outlen > 16)
*(u32 *)(out + 12) = x1.e; {
if (outlen > 16) { out->x[2] = s->x[2];
*(u32 *)(out + 16) = x2.o;
*(u32 *)(out + 20) = x2.e;
} }
} }
...@@ -63,121 +103,43 @@ void isap_rk( ...@@ -63,121 +103,43 @@ void isap_rk(
/******************************************************************************/ /******************************************************************************/
void isap_mac( void isap_mac(
const u8 *k, const uint8_t *k,
const u8 *npub, const uint8_t *npub,
const u8 *ad, u64 adlen, const uint8_t *ad, uint64_t adlen,
const u8 *c, u64 clen, const uint8_t *c, uint64_t clen,
u8 *tag) uint8_t *tag)
{ {
// State and temporary variables state_t state;
u32_2 x0, x1, x2, x3, x4; state_t *s = &state;
u32_2 t0;
u64 tmp0;
// Initialize // Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)npub + 0)); s->l[0] = U64TOWORD(*(lane_t *)(npub + 0));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(npub + 8))); s->l[1] = U64TOWORD(*(lane_t *)(npub + 8));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(ISAP_IV_A))); s->l[2] = U64TOWORD(*(lane_t *)(ISAP_IV_A + 0));
x3.o = 0; s->x[3] = 0;
x3.e = 0; s->x[4] = 0;
x4.o = 0;
x4.e = 0;
P_sH; P_sH;
// Absorb full lanes of AD // Absorb associated data
while (adlen >= 8) ABSORB_LANES(s, ad, adlen);
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)ad));
x0.e ^= t0.e;
x0.o ^= t0.o;
adlen -= ISAP_rH / 8;
ad += ISAP_rH / 8;
P_sH;
}
// Absorb partial lane of AD and add padding
if (adlen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < adlen; i++)
{
tmp0_bytes[i] = *ad;
ad += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb AD padding if not already done before
if (adlen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Domain Seperation
x4.e ^= ((u32)0x01);
// Absorb full lanes of C // Domain seperation
while (clen >= 8) s->w[4][0] ^= 0x1UL;
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)c));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
clen -= ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Absorb partial lane of C and add padding // Absorb ciphertext
if (clen > 0) ABSORB_LANES(s, c, clen);
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < clen; i++)
{
tmp0_bytes[i] = *c;
c += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb C padding if not already done before // Derive KA*
if (clen == 0) s->l[0] = WORDTOU64(s->l[0]);
{ s->l[1] = WORDTOU64(s->l[1]);
x0.o ^= 0x80000000; isap_rk(k, ISAP_IV_KA, (const uint8_t *)(s->b), s, CRYPTO_KEYBYTES);
P_sH;
}
// Finalize - Derive Ka* // Squeeze tag
u64 y64[CRYPTO_KEYBYTES / 8];
from_bit_interleaving(&tmp0, x0);
y64[0] = U64BIG(tmp0);
from_bit_interleaving(&tmp0, x1);
y64[1] = U64BIG(tmp0);
u32 ka_star32[CRYPTO_KEYBYTES / 4];
isap_rk(k, ISAP_IV_KA, (u8 *)y64, (u8 *)ka_star32, CRYPTO_KEYBYTES);
// Finalize - Squeeze T
x0.o = ka_star32[0];
x0.e = ka_star32[1];
x1.o = ka_star32[2];
x1.e = ka_star32[3];
P_sH; P_sH;
from_bit_interleaving(&tmp0, x0); lane_t t0 = WORDTOU64(s->l[0]);
*(u64 *)(tag + 0) = U64BIG(tmp0); memcpy(tag + 0, t0.b, 8);
from_bit_interleaving(&tmp0, x1); t0 = WORDTOU64(s->l[1]);
*(u64 *)(tag + 8) = U64BIG(tmp0); memcpy(tag + 8, t0.b, 8);
} }
/******************************************************************************/ /******************************************************************************/
...@@ -185,52 +147,80 @@ void isap_mac( ...@@ -185,52 +147,80 @@ void isap_mac(
/******************************************************************************/ /******************************************************************************/
void isap_enc( void isap_enc(
const u8 *k, const uint8_t *k,
const u8 *npub, const uint8_t *npub,
const u8 *m, u64 mlen, const uint8_t *m, uint64_t mlen,
u8 *c) uint8_t *c)
{ {
// Derive Ke state_t state;
u8 ke[ISAP_STATE_SZ - CRYPTO_NPUBBYTES]; state_t *s = &state;
isap_rk(k, ISAP_IV_KE, npub, ke, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
// Init state
// State and temporary variables isap_rk(k, ISAP_IV_KE, npub, s, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
u32_2 x0, x1, x2, x3, x4; s->l[3] = U64TOWORD(*(lane_t *)(npub + 0));
u64 tmp0; s->l[4] = U64TOWORD(*(lane_t *)(npub + 8));
// Init State while (mlen >= ISAP_rH / 8)
x0.o = *(u32 *)(ke + 0);
x0.e = *(u32 *)(ke + 4);
x1.o = *(u32 *)(ke + 8);
x1.e = *(u32 *)(ke + 12);
x2.o = *(u32 *)(ke + 16);
x2.e = *(u32 *)(ke + 20);
to_bit_interleaving(&x3, U64BIG(*(u64 *)npub));
to_bit_interleaving(&x4, U64BIG(*(u64 *)(npub + 8)));
// Squeeze full lanes
while (mlen >= 8)
{ {
// Encrypt full lanes
P_sE; P_sE;
from_bit_interleaving(&tmp0, x0); lane_t t0 = WORDTOU64(s->l[0]);
*(u64 *)c = *(u64 *)m ^ U64BIG(tmp0); *(uint64_t *)c = *(uint64_t *)m ^ t0.x;
mlen -= 8; mlen -= ISAP_rH / 8;
m += ISAP_rH / 8; m += ISAP_rH / 8;
c += ISAP_rH / 8; c += ISAP_rH / 8;
} }
// Squeeze partial lane
if (mlen > 0) if (mlen > 0)
{ {
// Encrypt partial lanes
P_sE; P_sE;
from_bit_interleaving(&tmp0, x0); lane_t t0 = WORDTOU64(s->l[0]);
tmp0 = U64BIG(tmp0); for (uint8_t i = 0; i < mlen; i++)
u8 *tmp0_bytes = (u8 *)&tmp0;
for (u8 i = 0; i < mlen; i++)
{ {
*c = *m ^ tmp0_bytes[i]; *c = *m ^ t0.b[i];
m += 1; m += 1;
c += 1; c += 1;
} }
} }
} }
/******************************************************************************/
/* Ascon-Hash */
/******************************************************************************/
#if ENABLE_HASH == 1
int crypto_hash(uint8_t *out, const uint8_t *in, unsigned long long inlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(ASCON_HASH_IV + 0));
s->x[1] = 0;
s->x[2] = 0;
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb input
ABSORB_LANES(s, in, inlen);
for (size_t i = 0; i < 4; i++)
{
// Squeeze full lanes
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)(out + 8 * i) = t0.x;
if (i < 3)
{
P_sH;
}
}
return 0;
}
#endif
#ifndef ISAP_H #ifndef ISAP_H_
#define ISAP_H #define ISAP_H_
#include <inttypes.h>
// Rate in bits // Rate in bits
#define ISAP_rH 64 #define ISAP_rH 64
...@@ -27,23 +29,16 @@ ...@@ -27,23 +29,16 @@
#define ISAP_K 128 #define ISAP_K 128
void isap_mac( void isap_mac(
const unsigned char *k, const uint8_t *k,
const unsigned char *npub, const uint8_t *npub,
const unsigned char *ad, unsigned long long adlen, const uint8_t *ad, const uint64_t adlen,
const unsigned char *c, unsigned long long clen, const uint8_t *c, const uint64_t clen,
unsigned char *tag); uint8_t *tag);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
unsigned char *out,
const unsigned char outlen);
void isap_enc( void isap_enc(
const unsigned char *k, const uint8_t *k,
const unsigned char *npub, const uint8_t *npub,
const unsigned char *m, unsigned long long mlen, const uint8_t *m, const uint64_t mlen,
unsigned char *c); uint8_t *c);
#endif #endif // ISAP_H_
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH PROUNDS(s, 12)
#define P_sB PROUNDS(s, 1)
#define P_sE PROUNDS(s, 6)
#define P_sK PROUNDS(s, 12)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) U64BIG(x)
#define WORDTOU64(x) U64BIG(x)
/* ---------------------------------------------------------------- */
#define TOBI(x) (x)
#define FROMBI(x) (x)
/* ---------------------------------------------------------------- */
lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint8_t C)
{
uint64_t xtemp;
/* round constant */
s->x[2] ^= C;
/* s-box layer */
s->x[0] ^= s->x[4];
s->x[4] ^= s->x[3];
s->x[2] ^= s->x[1];
xtemp = s->x[0] & ~s->x[4];
s->x[0] ^= s->x[2] & ~s->x[1];
s->x[2] ^= s->x[4] & ~s->x[3];
s->x[4] ^= s->x[1] & ~s->x[0];
s->x[1] ^= s->x[3] & ~s->x[2];
s->x[3] ^= xtemp;
s->x[1] ^= s->x[0];
s->x[3] ^= s->x[2];
s->x[0] ^= s->x[4];
s->x[2] = ~s->x[2];
/* linear layer */
s->x[0] ^=
(s->x[0] >> 19) ^ (s->x[0] << 45) ^ (s->x[0] >> 28) ^ (s->x[0] << 36);
s->x[1] ^=
(s->x[1] >> 61) ^ (s->x[1] << 3) ^ (s->x[1] >> 39) ^ (s->x[1] << 25);
s->x[2] ^=
(s->x[2] >> 1) ^ (s->x[2] << 63) ^ (s->x[2] >> 6) ^ (s->x[2] << 58);
s->x[3] ^=
(s->x[3] >> 10) ^ (s->x[3] << 54) ^ (s->x[3] >> 17) ^ (s->x[3] << 47);
s->x[4] ^=
(s->x[4] >> 7) ^ (s->x[4] << 57) ^ (s->x[4] >> 41) ^ (s->x[4] << 23);
}
/* ---------------------------------------------------------------- */
void PROUNDS(state_t *s, uint8_t nr)
{
switch (nr)
{
case 12:
ROUND(s, 0xf0);
ROUND(s, 0xe1);
ROUND(s, 0xd2);
ROUND(s, 0xc3);
ROUND(s, 0xb4);
ROUND(s, 0xa5);
case 6:
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
default:
ROUND(s, 0x4b);
}
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H_
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
../opt_32/crypto_aead.c
\ No newline at end of file
../opt_32/forceinline.h
\ No newline at end of file
../opt_32/api.h
\ No newline at end of file
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH PROUNDS(s, 12)
#define P_sB PROUNDS(s, 1)
#define P_sE PROUNDS(s, 6)
#define P_sK PROUNDS(s, 12)
#define P_PVP PROUNDS(s, 7)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) U64BIG(x)
#define WORDTOU64(x) U64BIG(x)
/* ---------------------------------------------------------------- */
#define TOBI(x) (x)
#define FROMBI(x) (x)
/* ---------------------------------------------------------------- */
lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint8_t C)
{
uint64_t xtemp;
/* round constant */
s->x[2] ^= C;
/* s-box layer */
s->x[0] ^= s->x[4];
s->x[4] ^= s->x[3];
s->x[2] ^= s->x[1];
xtemp = s->x[0] & ~s->x[4];
s->x[0] ^= s->x[2] & ~s->x[1];
s->x[2] ^= s->x[4] & ~s->x[3];
s->x[4] ^= s->x[1] & ~s->x[0];
s->x[1] ^= s->x[3] & ~s->x[2];
s->x[3] ^= xtemp;
s->x[1] ^= s->x[0];
s->x[3] ^= s->x[2];
s->x[0] ^= s->x[4];
s->x[2] = ~s->x[2];
/* linear layer */
s->x[0] ^=
(s->x[0] >> 19) ^ (s->x[0] << 45) ^ (s->x[0] >> 28) ^ (s->x[0] << 36);
s->x[1] ^=
(s->x[1] >> 61) ^ (s->x[1] << 3) ^ (s->x[1] >> 39) ^ (s->x[1] << 25);
s->x[2] ^=
(s->x[2] >> 1) ^ (s->x[2] << 63) ^ (s->x[2] >> 6) ^ (s->x[2] << 58);
s->x[3] ^=
(s->x[3] >> 10) ^ (s->x[3] << 54) ^ (s->x[3] >> 17) ^ (s->x[3] << 47);
s->x[4] ^=
(s->x[4] >> 7) ^ (s->x[4] << 57) ^ (s->x[4] >> 41) ^ (s->x[4] << 23);
}
/* ---------------------------------------------------------------- */
void PROUNDS(state_t *s, uint8_t nr)
{
switch (nr)
{
case 12:
ROUND(s, 0xf0);
ROUND(s, 0xe1);
ROUND(s, 0xd2);
ROUND(s, 0xc3);
ROUND(s, 0xb4);
case 7:
ROUND(s, 0xa5);
case 6:
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
default:
ROUND(s, 0x4b);
}
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H_
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
#include "api.h"
#include "isap.h"
#include "crypto_aead.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen + ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0)
{
isap_enc(k, npub, m, mlen, c);
}
// Generate tag
unsigned char *tag = c + mlen;
isap_mac(k, npub, ad, adlen, c, mlen, tag);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
(void)nsec;
// Plaintext length is clen - tag length
*mlen = clen - ISAP_TAG_SZ;
// Generate tag
unsigned char T[16];
isap_mac(k, npub, ad, adlen, c, *mlen, T);
// Compare tag
unsigned char T_star[16];
for (int i = 0; i < 16; i++)
{
T_star[i] = *(c + *mlen + i);
}
int eq_cnt = pvp(T, T_star);
// Perform decryption if tag is correct
if (eq_cnt == ISAP_TAG_SZ)
{
if (*mlen > 0)
{
isap_enc(k, npub, c, *mlen, m);
}
return 0;
}
else
{
return -1;
}
}
../opt_32/forceinline.h
\ No newline at end of file
#include <string.h>
#include <inttypes.h>
#include "api.h"
#include "isap.h"
#include "asconp.h"
#include "config.h"
// Needed for LR tag comparison
uint64_t s0, s1;
forceinline void ABSORB_LANES(state_t *s, const uint8_t *src, uint64_t len)
{
while (len >= 8)
{
// Absorb full lanes
lane_t t0 = U64TOWORD(*(lane_t *)(src + 0));
s->x[0] ^= t0.x;
len -= ISAP_rH / 8;
src += ISAP_rH / 8;
P_sH;
}
if (len > 0)
{
// Absorb partial lane and padding
size_t i;
lane_t t0 = {0};
for (i = 0; i < len; i++)
{
t0.b[7 - i] ^= *src;
src++;
}
t0.b[7 - i] ^= 0x80;
t0 = TOBI(t0);
s->x[0] ^= t0.x;
P_sH;
}
else
{
// Absorb padded empty lane
s->b[0][7] ^= 0x80;
P_sH;
}
}
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const uint8_t *k,
const uint8_t *iv,
const uint8_t *y,
state_t *out,
const size_t outlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(k + 0));
s->l[1] = U64TOWORD(*(lane_t *)(k + 8));
s->l[2] = U64TOWORD(*(lane_t *)(iv + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sK;
// Absorb Y, bit by bit
for (size_t i = 0; i < 16; i++)
{
uint8_t y_byte = *y;
s->b[0][7] ^= (y_byte & 0x80) << 0;
P_sB;
s->b[0][7] ^= (y_byte & 0x40) << 1;
P_sB;
s->b[0][7] ^= (y_byte & 0x20) << 2;
P_sB;
s->b[0][7] ^= (y_byte & 0x10) << 3;
P_sB;
s->b[0][7] ^= (y_byte & 0x08) << 4;
P_sB;
s->b[0][7] ^= (y_byte & 0x04) << 5;
P_sB;
s->b[0][7] ^= (y_byte & 0x02) << 6;
P_sB;
s->b[0][7] ^= (y_byte & 0x01) << 7;
if (i != 15)
{
P_sB;
y += 1;
}
}
// Squeeze K*
P_sK;
out->x[0] = s->x[0];
out->x[1] = s->x[1];
if (outlen > 16)
{
out->x[2] = s->x[2];
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, uint64_t adlen,
const uint8_t *c, uint64_t clen,
uint8_t *tag)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[1] = U64TOWORD(*(lane_t *)(npub + 8));
s->l[2] = U64TOWORD(*(lane_t *)(ISAP_IV_A + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb associated data
ABSORB_LANES(s, ad, adlen);
// Domain seperation
s->w[4][0] ^= 0x1UL;
// Absorb ciphertext
ABSORB_LANES(s, c, clen);
// Needed for LR tag comparison
s0 = s->x[0];
s1 = s->x[1];
// Derive KA*
s->l[0] = WORDTOU64(s->l[0]);
s->l[1] = WORDTOU64(s->l[1]);
isap_rk(k, ISAP_IV_KA, (const uint8_t *)(s->b), s, CRYPTO_KEYBYTES);
// Squeeze tag
P_sH;
lane_t t0 = WORDTOU64(s->l[0]);
memcpy(tag + 0, t0.b, 8);
t0 = WORDTOU64(s->l[1]);
memcpy(tag + 8, t0.b, 8);
}
/******************************************************************************/
/* Leakage resilient tag comparison. */
/* Similar to Fig.3 of https://eprint.iacr.org/2021/402.pdf. */
/******************************************************************************/
int pvp(
const uint8_t *T, const uint8_t *T_star)
{
state_t state;
state_t *s = &state;
// Calculate U
s->l[0] = U64TOWORD(*(lane_t *)(T + 0));
s->l[1] = U64TOWORD(*(lane_t *)(T + 8));
s->x[2] = s0;
s->x[3] = s1;
s->x[4] = 0;
P_PVP;
uint64_t u0, u1;
u0 = s->x[0];
u1 = s->x[1];
// Calculate U'
s->l[0] = U64TOWORD(*(lane_t *)(T_star + 0));
s->l[1] = U64TOWORD(*(lane_t *)(T_star + 8));
s->x[2] = s0;
s->x[3] = s1;
s->x[4] = 0;
P_PVP;
// Compare tag
int eq_cnt = 0;
for (size_t i = 0; i < 8; i++)
{
eq_cnt += (s->b[0][i] == ((uint8_t *)&u0)[i]);
}
for (size_t i = 0; i < 8; i++)
{
eq_cnt += (s->b[1][i] == ((uint8_t *)&u1)[i]);
}
return eq_cnt;
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, uint64_t mlen,
uint8_t *c)
{
state_t state;
state_t *s = &state;
// Init state
isap_rk(k, ISAP_IV_KE, npub, s, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
s->l[3] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[4] = U64TOWORD(*(lane_t *)(npub + 8));
while (mlen >= ISAP_rH / 8)
{
// Encrypt full lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)c = *(uint64_t *)m ^ t0.x;
mlen -= ISAP_rH / 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
if (mlen > 0)
{
// Encrypt partial lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
for (uint8_t i = 0; i < mlen; i++)
{
*c = *m ^ t0.b[i];
m += 1;
c += 1;
}
}
}
/******************************************************************************/
/* Ascon-Hash */
/******************************************************************************/
#if ENABLE_HASH == 1
int crypto_hash(uint8_t *out, const uint8_t *in, unsigned long long inlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(ASCON_HASH_IV + 0));
s->x[1] = 0;
s->x[2] = 0;
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb input
ABSORB_LANES(s, in, inlen);
for (size_t i = 0; i < 4; i++)
{
// Squeeze full lanes
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)(out + 8 * i) = t0.x;
if (i < 3)
{
P_sH;
}
}
return 0;
}
#endif
#ifndef ISAP_H
#define ISAP_H
#include <inttypes.h>
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, const uint64_t adlen,
const uint8_t *c, const uint64_t clen,
uint8_t *tag);
int pvp(
const uint8_t *T,
const uint8_t *T_star);
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, uint64_t mlen,
uint8_t *c);
#endif
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH PROUNDS(s, 12)
#define P_sB PROUNDS(s, 1)
#define P_sE PROUNDS(s, 6)
#define P_sK PROUNDS(s, 12)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) U64BIG(x)
#define WORDTOU64(x) U64BIG(x)
/* ---------------------------------------------------------------- */
#define TOBI(x) (x)
#define FROMBI(x) (x)
/* ---------------------------------------------------------------- */
lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
void ROUND(state_t *s, uint8_t C)
{
uint64_t xtemp;
/* round constant */
s->x[2] ^= C;
/* s-box layer */
s->x[0] ^= s->x[4];
s->x[4] ^= s->x[3];
s->x[2] ^= s->x[1];
xtemp = s->x[0] & ~s->x[4];
s->x[0] ^= s->x[2] & ~s->x[1];
s->x[2] ^= s->x[4] & ~s->x[3];
s->x[4] ^= s->x[1] & ~s->x[0];
s->x[1] ^= s->x[3] & ~s->x[2];
s->x[3] ^= xtemp;
s->x[1] ^= s->x[0];
s->x[3] ^= s->x[2];
s->x[0] ^= s->x[4];
s->x[2] = ~s->x[2];
/* linear layer */
s->x[0] ^=
(s->x[0] >> 19) ^ (s->x[0] << 45) ^ (s->x[0] >> 28) ^ (s->x[0] << 36);
s->x[1] ^=
(s->x[1] >> 61) ^ (s->x[1] << 3) ^ (s->x[1] >> 39) ^ (s->x[1] << 25);
s->x[2] ^=
(s->x[2] >> 1) ^ (s->x[2] << 63) ^ (s->x[2] >> 6) ^ (s->x[2] << 58);
s->x[3] ^=
(s->x[3] >> 10) ^ (s->x[3] << 54) ^ (s->x[3] >> 17) ^ (s->x[3] << 47);
s->x[4] ^=
(s->x[4] >> 7) ^ (s->x[4] << 57) ^ (s->x[4] >> 41) ^ (s->x[4] << 23);
}
/* ---------------------------------------------------------------- */
#define RC(i) (i)
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#define INC -0x0f
#define END 0x3c
void PROUNDS(state_t *s, uint8_t nr)
{
int i = START(nr);
do
{
ROUND(s, RC(i));
i += INC;
} while (i != END);
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H_
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
../opt_32/crypto_aead.c
\ No newline at end of file
../opt_32/forceinline.h
\ No newline at end of file
#include <string.h>
#include <inttypes.h>
#include "api.h"
#include "isap.h"
#include "asconp.h"
#include "config.h"
void ABSORB_LANES(state_t *s, const uint8_t *src, uint64_t len)
{
while (len >= 8)
{
// Absorb full lanes
lane_t t0 = U64TOWORD(*(lane_t *)(src + 0));
s->x[0] ^= t0.x;
len -= ISAP_rH / 8;
src += ISAP_rH / 8;
P_sH;
}
if (len > 0)
{
// Absorb partial lane and padding
size_t i;
lane_t t0 = {0};
for (i = 0; i < len; i++)
{
t0.b[7 - i] ^= *src;
src++;
}
t0.b[7 - i] ^= 0x80;
t0 = TOBI(t0);
s->x[0] ^= t0.x;
P_sH;
}
else
{
// Absorb padded empty lane
s->b[0][7] ^= 0x80;
P_sH;
}
}
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const uint8_t *k,
const uint8_t *iv,
const uint8_t *y,
state_t *out,
const size_t outlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(k + 0));
s->l[1] = U64TOWORD(*(lane_t *)(k + 8));
s->l[2] = U64TOWORD(*(lane_t *)(iv + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sK;
// Absorb Y, bit by bit
for (size_t i = 0; i < 16; i++)
{
uint8_t y_byte = *y;
s->b[0][7] ^= (y_byte & 0x80) << 0;
P_sB;
s->b[0][7] ^= (y_byte & 0x40) << 1;
P_sB;
s->b[0][7] ^= (y_byte & 0x20) << 2;
P_sB;
s->b[0][7] ^= (y_byte & 0x10) << 3;
P_sB;
s->b[0][7] ^= (y_byte & 0x08) << 4;
P_sB;
s->b[0][7] ^= (y_byte & 0x04) << 5;
P_sB;
s->b[0][7] ^= (y_byte & 0x02) << 6;
P_sB;
s->b[0][7] ^= (y_byte & 0x01) << 7;
if (i != 15)
{
P_sB;
y += 1;
}
}
// Squeeze K*
P_sK;
out->x[0] = s->x[0];
out->x[1] = s->x[1];
if (outlen > 16)
{
out->x[2] = s->x[2];
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, uint64_t adlen,
const uint8_t *c, uint64_t clen,
uint8_t *tag)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[1] = U64TOWORD(*(lane_t *)(npub + 8));
s->l[2] = U64TOWORD(*(lane_t *)(ISAP_IV_A + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb associated data
ABSORB_LANES(s, ad, adlen);
// Domain seperation
s->w[4][0] ^= 0x1UL;
// Absorb ciphertext
ABSORB_LANES(s, c, clen);
// Derive KA*
s->l[0] = WORDTOU64(s->l[0]);
s->l[1] = WORDTOU64(s->l[1]);
isap_rk(k, ISAP_IV_KA, (const uint8_t *)(s->b), s, CRYPTO_KEYBYTES);
// Squeeze tag
P_sH;
lane_t t0 = WORDTOU64(s->l[0]);
memcpy(tag + 0, t0.b, 8);
t0 = WORDTOU64(s->l[1]);
memcpy(tag + 8, t0.b, 8);
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, uint64_t mlen,
uint8_t *c)
{
state_t state;
state_t *s = &state;
// Init state
isap_rk(k, ISAP_IV_KE, npub, s, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
s->l[3] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[4] = U64TOWORD(*(lane_t *)(npub + 8));
while (mlen >= ISAP_rH / 8)
{
// Encrypt full lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)c = *(uint64_t *)m ^ t0.x;
mlen -= ISAP_rH / 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
if (mlen > 0)
{
// Encrypt partial lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
for (uint8_t i = 0; i < mlen; i++)
{
*c = *m ^ t0.b[i];
m += 1;
c += 1;
}
}
}
/******************************************************************************/
/* Ascon-Hash */
/******************************************************************************/
#if ENABLE_HASH == 1
int crypto_hash(uint8_t *out, const uint8_t *in, unsigned long long inlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(ASCON_HASH_IV + 0));
s->x[1] = 0;
s->x[2] = 0;
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb input
ABSORB_LANES(s, in, inlen);
for (size_t i = 0; i < 4; i++)
{
// Squeeze full lanes
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)(out + 8 * i) = t0.x;
if (i < 3)
{
P_sH;
}
}
return 0;
}
#endif
../opt_32/isap.h
\ No newline at end of file
#ifndef API_H #ifndef API_H
#define API_H #define API_H
#define CRYPTO_VERSION "2.0.1" // isapa128a
#define CRYPTO_KEYBYTES 16 #define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0 #define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
// asconhashv12
#define CRYPTO_BYTES 32 #define CRYPTO_BYTES 32
#define ASCON_HASH_OUTLEN 32
#define ASCON_HASH_ROUNDS 12
#endif #endif
typedef unsigned char u8; #ifndef ASCONP_H_
typedef unsigned long u32; #define ASCONP_H_
typedef unsigned long long u64;
#include <inttypes.h>
#define ASCON_128_RATE 8 #include "forceinline.h"
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_STATE_SZ 40 typedef union
{
#define ASCON_HASH_IV \ uint64_t x;
(((u64)(ASCON_128_RATE * 8) << 48) | \ uint32_t w[2];
((u64)(ASCON_128_PA_ROUNDS) << 40) | \ uint8_t b[8];
((u64)(CRYPTO_BYTES * 8) << 0)) } lane_t;
#define ROTR(x,n) (((x)>>(n))|((x)<<(64-(n)))) typedef union
#define EXT_BYTE(x,n) ((u8)((u64)(x)>>(8*(7-(n))))) {
#define INS_BYTE(x,n) ((u64)(x)<<(8*(7-(n)))) lane_t l[5];
uint64_t x[5];
#define U64BIG(x) \ uint32_t w[5][2];
((ROTR(x, 8) & (0xFF000000FF000000ULL)) | \ uint8_t b[5][8];
(ROTR(x,24) & (0x00FF000000FF0000ULL)) | \ } state_t;
(ROTR(x,40) & (0x0000FF000000FF00ULL)) | \
(ROTR(x,56) & (0x000000FF000000FFULL))) /* ---------------------------------------------------------------- */
#define ROUND(C) ({\ #define P_sH P12ROUNDS(s)
x2 ^= C;\ #define P_sB P1ROUNDS(s)
x0 ^= x4;\ #define P_sE P6ROUNDS(s)
x4 ^= x3;\ #define P_sK P12ROUNDS(s)
x2 ^= x1;\
t0 = x0;\ /* ---------------------------------------------------------------- */
t4 = x4;\
t3 = x3;\ #define U64TOWORD(x) U64BIG(x)
t1 = x1;\ #define WORDTOU64(x) U64BIG(x)
t2 = x2;\
x0 = t0 ^ ((~t1) & t2);\ /* ---------------------------------------------------------------- */
x2 = t2 ^ ((~t3) & t4);\
x4 = t4 ^ ((~t0) & t1);\ #define TOBI(x) (x)
x1 = t1 ^ ((~t2) & t3);\ #define FROMBI(x) (x)
x3 = t3 ^ ((~t4) & t0);\
x1 ^= x0;\ /* ---------------------------------------------------------------- */
t1 = x1;\
x1 = ROTR(x1, R[1][0]);\ forceinline lane_t U64BIG(lane_t x)
x3 ^= x2;\ {
t2 = x2;\ x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
x2 = ROTR(x2, R[2][0]);\ (((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
t4 = x4;\ (((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
t2 ^= x2;\ (((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
x2 = ROTR(x2, R[2][1] - R[2][0]);\ return x;
t3 = x3;\ }
t1 ^= x1;\
x3 = ROTR(x3, R[3][0]);\ /* ---------------------------------------------------------------- */
x0 ^= x4;\
x4 = ROTR(x4, R[4][0]);\ forceinline uint64_t ROR(uint64_t x, int n) { return x >> n | x << (-n & 63); }
t3 ^= x3;\
x2 ^= t2;\ /* ---------------------------------------------------------------- */
x1 = ROTR(x1, R[1][1] - R[1][0]);\
t0 = x0;\ forceinline void ROUND(state_t *s, uint8_t C)
x2 = ~x2;\ {
x3 = ROTR(x3, R[3][1] - R[3][0]);\ state_t t;
t4 ^= x4;\ /* round constant */
x4 = ROTR(x4, R[4][1] - R[4][0]);\ s->x[2] ^= C;
x3 ^= t3;\ /* s-box layer */
x1 ^= t1;\ s->x[0] ^= s->x[4];
x0 = ROTR(x0, R[0][0]);\ s->x[4] ^= s->x[3];
x4 ^= t4;\ s->x[2] ^= s->x[1];
t0 ^= x0;\ t.x[0] = s->x[0] ^ (~s->x[1] & s->x[2]);
x0 = ROTR(x0, R[0][1] - R[0][0]);\ t.x[2] = s->x[2] ^ (~s->x[3] & s->x[4]);
x0 ^= t0;\ t.x[4] = s->x[4] ^ (~s->x[0] & s->x[1]);
}) t.x[1] = s->x[1] ^ (~s->x[2] & s->x[3]);
t.x[3] = s->x[3] ^ (~s->x[4] & s->x[0]);
#define P12 ({\ t.x[1] ^= t.x[0];
ROUND(0xf0);\ t.x[3] ^= t.x[2];
ROUND(0xe1);\ t.x[0] ^= t.x[4];
ROUND(0xd2);\ /* linear layer */
ROUND(0xc3);\ s->x[2] = t.x[2] ^ ROR(t.x[2], 6 - 1);
ROUND(0xb4);\ s->x[3] = t.x[3] ^ ROR(t.x[3], 17 - 10);
ROUND(0xa5);\ s->x[4] = t.x[4] ^ ROR(t.x[4], 41 - 7);
ROUND(0x96);\ s->x[0] = t.x[0] ^ ROR(t.x[0], 28 - 19);
ROUND(0x87);\ s->x[1] = t.x[1] ^ ROR(t.x[1], 61 - 39);
ROUND(0x78);\ s->x[2] = t.x[2] ^ ROR(s->x[2], 1);
ROUND(0x69);\ s->x[3] = t.x[3] ^ ROR(s->x[3], 10);
ROUND(0x5a);\ s->x[4] = t.x[4] ^ ROR(s->x[4], 7);
ROUND(0x4b);\ s->x[0] = t.x[0] ^ ROR(s->x[0], 19);
}) s->x[1] = t.x[1] ^ ROR(s->x[1], 39);
s->x[2] = ~s->x[2];
#define P6 ({\ }
ROUND(0x96);\
ROUND(0x87);\ /* ---------------------------------------------------------------- */
ROUND(0x78);\
ROUND(0x69);\ forceinline void P12ROUNDS(state_t *s)
ROUND(0x5a);\ {
ROUND(0x4b);\ ROUND(s, 0xf0);
}) ROUND(s, 0xe1);
ROUND(s, 0xd2);
#define P1 ({\ ROUND(s, 0xc3);
ROUND(0x4b);\ ROUND(s, 0xb4);
}) ROUND(s, 0xa5);
ROUND(s, 0x96);
static const int R[5][2] = { ROUND(s, 0x87);
{19, 28}, {39, 61}, {1, 6}, {10, 17}, {7, 41} ROUND(s, 0x78);
}; ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
/* ---------------------------------------------------------------- */
forceinline void P6ROUNDS(state_t *s)
{
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
/* ---------------------------------------------------------------- */
forceinline void P1ROUNDS(state_t *s)
{
ROUND(s, 0x4b);
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H_
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
#ifndef FORCEINLINE_H_
#define FORCEINLINE_H_
/* define forceinline macro */
#ifdef _MSC_VER
#define forceinline __forceinline
#elif defined(__GNUC__)
#define forceinline inline __attribute__((__always_inline__))
#elif defined(__CLANG__)
#if __has_attribute(__always_inline__)
#define forceinline inline __attribute__((__always_inline__))
#else
#define forceinline inline
#endif
#else
#define forceinline inline
#endif
#endif /* FORCEINLINE_H_ */
#include <stdio.h>
#include <string.h> #include <string.h>
#include <inttypes.h>
#include "api.h" #include "api.h"
#include "isap.h" #include "isap.h"
#include "asconp.h" #include "asconp.h"
#include "config.h"
forceinline void ABSORB_LANES(state_t *s, const uint8_t *src, uint64_t len)
{
while (len >= 8)
{
// Absorb full lanes
lane_t t0 = U64TOWORD(*(lane_t *)(src + 0));
s->x[0] ^= t0.x;
len -= ISAP_rH / 8;
src += ISAP_rH / 8;
P_sH;
}
const u8 ISAP_IV1[] = {0x01,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK}; if (len > 0)
const u8 ISAP_IV2[] = {0x02,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK}; {
const u8 ISAP_IV3[] = {0x03,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK}; // Absorb partial lane and padding
size_t i;
lane_t t0 = {0};
for (i = 0; i < len; i++)
{
t0.b[7 - i] ^= *src;
src++;
}
t0.b[7 - i] ^= 0x80;
t0 = TOBI(t0);
s->x[0] ^= t0.x;
P_sH;
}
else
{
// Absorb padded empty lane
s->b[0][7] ^= 0x80;
P_sH;
}
}
/******************************************************************************/ /******************************************************************************/
/* IsapRk */ /* ISAP_RK */
/******************************************************************************/ /******************************************************************************/
void isap_rk( void isap_rk(
const u8 *k, const uint8_t *k,
const u8 *iv, const uint8_t *iv,
const u8 *y, const uint8_t *y,
const u64 ylen, state_t *out,
u8 *out, const size_t outlen)
const u64 outlen {
){ state_t state;
const u64 *k64 = (u64 *)k; state_t *s = &state;
const u64 *iv64 = (u64 *)iv;
u64 *out64 = (u64 *)out; // Initialize
u64 x0, x1, x2, x3, x4; s->l[0] = U64TOWORD(*(lane_t *)(k + 0));
u64 t0, t1, t2, t3, t4; s->l[1] = U64TOWORD(*(lane_t *)(k + 8));
s->l[2] = U64TOWORD(*(lane_t *)(iv + 0));
// Init state s->x[3] = 0;
t0 = t1 = t2 = t3 = t4 = 0; s->x[4] = 0;
x0 = U64BIG(k64[0]); P_sK;
x1 = U64BIG(k64[1]);
x2 = U64BIG(iv64[0]); // Absorb Y, bit by bit
x3 = x4 = 0; for (size_t i = 0; i < 16; i++)
P12; {
uint8_t y_byte = *y;
// Absorb Y s->b[0][7] ^= (y_byte & 0x80) << 0;
for (size_t i = 0; i < ylen*8-1; i++){ P_sB;
size_t cur_byte_pos = i/8; s->b[0][7] ^= (y_byte & 0x40) << 1;
size_t cur_bit_pos = 7-(i%8); P_sB;
u8 cur_bit = ((y[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7; s->b[0][7] ^= (y_byte & 0x20) << 2;
x0 ^= ((u64)cur_bit) << 56; P_sB;
P1; s->b[0][7] ^= (y_byte & 0x10) << 3;
P_sB;
s->b[0][7] ^= (y_byte & 0x08) << 4;
P_sB;
s->b[0][7] ^= (y_byte & 0x04) << 5;
P_sB;
s->b[0][7] ^= (y_byte & 0x02) << 6;
P_sB;
s->b[0][7] ^= (y_byte & 0x01) << 7;
if (i != 15)
{
P_sB;
y += 1;
}
} }
u8 cur_bit = ((y[ylen-1]) & 0x01) << 7;
x0 ^= ((u64)cur_bit) << 56; // Squeeze K*
P12; P_sK;
out->x[0] = s->x[0];
// Extract K* out->x[1] = s->x[1];
out64[0] = U64BIG(x0); if (outlen > 16)
out64[1] = U64BIG(x1); {
if(outlen == 24){ out->x[2] = s->x[2];
out64[2] = U64BIG(x2);
} }
} }
/******************************************************************************/ /******************************************************************************/
/* IsapMac */ /* ISAP_MAC */
/******************************************************************************/ /******************************************************************************/
void isap_mac( void isap_mac(
const u8 *k, const uint8_t *k,
const u8 *npub, const uint8_t *npub,
const u8 *ad, u64 adlen, const uint8_t *ad, uint64_t adlen,
const u8 *c, u64 clen, const uint8_t *c, uint64_t clen,
u8 *tag uint8_t *tag)
){ {
u8 state[ISAP_STATE_SZ]; state_t state;
const u64 *npub64 = (u64 *)npub; state_t *s = &state;
u64 *state64 = (u64 *)state;
u64 x0, x1, x2, x3, x4; // Initialize
u64 t0, t1, t2, t3, t4; s->l[0] = U64TOWORD(*(lane_t *)(npub + 0));
t0 = t1 = t2 = t3 = t4 = 0; s->l[1] = U64TOWORD(*(lane_t *)(npub + 8));
s->l[2] = U64TOWORD(*(lane_t *)(ISAP_IV_A + 0));
// Init state s->x[3] = 0;
x0 = U64BIG(npub64[0]); s->x[4] = 0;
x1 = U64BIG(npub64[1]); P_sH;
x2 = U64BIG(((u64 *)ISAP_IV1)[0]);
x3 = x4 = 0; // Absorb associated data
P12; ABSORB_LANES(s, ad, adlen);
/* Absorb ad */
u64 *ad64 = (u64*)ad;
while(adlen >= ISAP_rH_SZ){
x0 ^= U64BIG(*ad64);
P12;
ad64++;
adlen -= ISAP_rH_SZ;
}
/* Absorb final ad block */
u8 *xo = (u8*)&x0;
xo[ISAP_rH_SZ-1-adlen] ^= 0x80;
while(adlen > 0) {
xo[ISAP_rH_SZ-adlen] ^= ((u8*)ad64)[adlen-1];
adlen--;
}
P12;
// Domain seperation // Domain seperation
x4 ^= 0x0000000000000001ULL; s->w[4][0] ^= 0x1UL;
/* Absorb c */
u64 *c64 = (u64*)c;
while(clen >= ISAP_rH_SZ){
x0 ^= U64BIG(*c64);
P12;
c64++;
clen -= ISAP_rH_SZ;
}
/* Absorb final c block */ // Absorb ciphertext
xo[ISAP_rH_SZ-1-clen] ^= 0x80; ABSORB_LANES(s, c, clen);
while(clen > 0) {
xo[ISAP_rH_SZ-clen] ^= ((u8*)c64)[clen-1]; // Derive KA*
clen--; s->l[0] = WORDTOU64(s->l[0]);
} s->l[1] = WORDTOU64(s->l[1]);
P12; isap_rk(k, ISAP_IV_KA, (const uint8_t *)(s->b), s, CRYPTO_KEYBYTES);
// Derive K*
state64[0] = U64BIG(x0);
state64[1] = U64BIG(x1);
state64[2] = U64BIG(x2);
state64[3] = U64BIG(x3);
state64[4] = U64BIG(x4);
isap_rk(k,ISAP_IV2,(u8 *)state64,CRYPTO_KEYBYTES,(u8 *)state64,CRYPTO_KEYBYTES);
x0 = U64BIG(state64[0]);
x1 = U64BIG(state64[1]);
x2 = U64BIG(state64[2]);
x3 = U64BIG(state64[3]);
x4 = U64BIG(state64[4]);
// Squeeze tag // Squeeze tag
P12; P_sH;
unsigned long long *tag64 = (u64 *)tag; lane_t t0 = WORDTOU64(s->l[0]);
tag64[0] = U64BIG(x0); memcpy(tag + 0, t0.b, 8);
tag64[1] = U64BIG(x1); t0 = WORDTOU64(s->l[1]);
memcpy(tag + 8, t0.b, 8);
} }
/******************************************************************************/ /******************************************************************************/
/* IsapEnc */ /* ISAP_ENC */
/******************************************************************************/ /******************************************************************************/
void isap_enc( void isap_enc(
const u8 *k, const uint8_t *k,
const u8 *npub, const uint8_t *npub,
const u8 *m, const uint8_t *m, uint64_t mlen,
u64 mlen, uint8_t *c)
u8 *c
){ {
u8 state[ISAP_STATE_SZ]; state_t state;
state_t *s = &state;
// Init state // Init state
u64 *state64 = (u64 *)state; isap_rk(k, ISAP_IV_KE, npub, s, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
u64 *npub64 = (u64 *)npub; s->l[3] = U64TOWORD(*(lane_t *)(npub + 0));
isap_rk(k,ISAP_IV3,npub,CRYPTO_NPUBBYTES,state,ISAP_STATE_SZ-CRYPTO_NPUBBYTES); s->l[4] = U64TOWORD(*(lane_t *)(npub + 8));
u64 x0, x1, x2, x3, x4;
u64 t0, t1, t2, t3, t4; while (mlen >= ISAP_rH / 8)
t0 = t1 = t2 = t3 = t4 = 0; {
x0 = U64BIG(state64[0]); // Encrypt full lanes
x1 = U64BIG(state64[1]); P_sE;
x2 = U64BIG(state64[2]); lane_t t0 = WORDTOU64(s->l[0]);
x3 = U64BIG(npub64[0]); *(uint64_t *)c = *(uint64_t *)m ^ t0.x;
x4 = U64BIG(npub64[1]); mlen -= ISAP_rH / 8;
P6; m += ISAP_rH / 8;
c += ISAP_rH / 8;
/* Encrypt m */ }
u64 *m64 = (u64 *)m;
u64 *c64 = (u64 *)c; if (mlen > 0)
while(mlen >= ISAP_rH_SZ){ {
*c64 = U64BIG(x0) ^ *m64; // Encrypt partial lanes
P6; P_sE;
m64++; lane_t t0 = WORDTOU64(s->l[0]);
c64++; for (uint8_t i = 0; i < mlen; i++)
mlen -= ISAP_rH_SZ; {
*c = *m ^ t0.b[i];
m += 1;
c += 1;
}
} }
}
/******************************************************************************/
/* Ascon-Hash */
/******************************************************************************/
/* Encrypt final m block */ #if ENABLE_HASH == 1
u8 *xo = (u8*)&x0;
while(mlen > 0) { int crypto_hash(uint8_t *out, const uint8_t *in, unsigned long long inlen)
((u8*)c64)[mlen-1] = xo[ISAP_rH_SZ-mlen] ^ ((u8*)m64)[mlen-1]; {
mlen--;
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(ASCON_HASH_IV + 0));
s->x[1] = 0;
s->x[2] = 0;
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb input
ABSORB_LANES(s, in, inlen);
for (size_t i = 0; i < 4; i++)
{
// Squeeze full lanes
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)(out + 8 * i) = t0.x;
if (i < 3)
{
P_sH;
}
} }
return 0;
} }
#endif
#ifndef ISAP_H #ifndef ISAP_H_
#define ISAP_H #define ISAP_H_
#include <inttypes.h>
// Rate in bits // Rate in bits
#define ISAP_rH 64 #define ISAP_rH 64
...@@ -15,7 +17,7 @@ ...@@ -15,7 +17,7 @@
#define ISAP_STATE_SZ 40 #define ISAP_STATE_SZ 40
// Size of rate in bytes // Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH+7)/8) #define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes // Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8 #define ISAP_IV_SZ 8
...@@ -27,27 +29,16 @@ ...@@ -27,27 +29,16 @@
#define ISAP_K 128 #define ISAP_K 128
void isap_mac( void isap_mac(
const unsigned char *k, const uint8_t *k,
const unsigned char *npub, const uint8_t *npub,
const unsigned char *ad, unsigned long long adlen, const uint8_t *ad, const uint64_t adlen,
const unsigned char *c, unsigned long long clen, const uint8_t *c, const uint64_t clen,
unsigned char *tag uint8_t *tag);
);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long long inlen,
unsigned char *out,
const unsigned long long outlen
);
void isap_enc( void isap_enc(
const unsigned char *k, const uint8_t *k,
const unsigned char *npub, const uint8_t *npub,
const unsigned char *m, unsigned long long mlen, const uint8_t *m, const uint64_t mlen,
unsigned char *c uint8_t *c);
);
#endif #endif // ISAP_H_
#ifndef API_H
#define API_H
// isapa128a
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
// asconhashv12
#define CRYPTO_BYTES 32
#define ASCON_HASH_OUTLEN 32
#define ASCON_HASH_ROUNDS 12
#endif
#ifndef ASCONP_H
#define ASCONP_H
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH PROUNDS(s, 12)
#define P_sB PROUNDS(s, 1)
#define P_sE PROUNDS(s, 6)
#define P_sK PROUNDS(s, 12)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) U64BIG(x)
#define WORDTOU64(x) U64BIG(x)
/* ---------------------------------------------------------------- */
#define TOBI(x) (x)
#define FROMBI(x) (x)
/* ---------------------------------------------------------------- */
lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
#define XMUL(i, x) \
do \
{ \
tmp = (uint16_t)a.b[i] * (1 << (x)); \
b.b[(byte_rol + (i)) & 0x7] ^= (uint8_t)tmp; \
b.b[(byte_rol + (i) + 1) & 0x7] ^= tmp >> 8; \
} while (0)
forceinline uint64_t ROR(uint64_t x, int n)
{
lane_t a = {.x = x}, b = {.x = 0ull};
int bit_rol = (64 - n) & 0x7;
int byte_rol = (64 - n) >> 3;
uint16_t tmp;
XMUL(0, bit_rol);
XMUL(1, bit_rol);
XMUL(2, bit_rol);
XMUL(3, bit_rol);
XMUL(4, bit_rol);
XMUL(5, bit_rol);
XMUL(6, bit_rol);
XMUL(7, bit_rol);
return b.x;
}
/* ---------------------------------------------------------------- */
forceinline uint8_t NOT8(uint8_t a) { return ~a; }
forceinline uint8_t XOR8(uint8_t a, uint8_t b) { return a ^ b; }
forceinline uint8_t AND8(uint8_t a, uint8_t b) { return a & b; }
forceinline uint8_t OR8(uint8_t a, uint8_t b) { return a | b; }
/* ---------------------------------------------------------------- */
forceinline void LINEAR_LAYER(state_t *s, uint64_t xtemp)
{
uint64_t temp;
temp = s->x[2] ^ ROR(s->x[2], 28 - 19);
s->x[0] = s->x[2] ^ ROR(temp, 19);
temp = s->x[4] ^ ROR(s->x[4], 6 - 1);
s->x[2] = s->x[4] ^ ROR(temp, 1);
temp = s->x[1] ^ ROR(s->x[1], 41 - 7);
s->x[4] = s->x[1] ^ ROR(temp, 7);
temp = s->x[3] ^ ROR(s->x[3], 61 - 39);
s->x[1] = s->x[3] ^ ROR(temp, 39);
temp = xtemp ^ ROR(xtemp, 17 - 10);
s->x[3] = xtemp ^ ROR(temp, 10);
}
/* ---------------------------------------------------------------- */
forceinline void NONLINEAR_LAYER(state_t *s, lane_t *xtemp, uint8_t pos)
{
uint8_t t0;
uint8_t t1;
uint8_t t2;
// Based on the round description of Ascon given in the Bachelor's thesis:
//"Optimizing Ascon on RISC-V" of Lars Jellema
// see https://github.com/Lucus16/ascon-riscv/
t0 = XOR8(s->b[1][pos], s->b[2][pos]);
t1 = XOR8(s->b[0][pos], s->b[4][pos]);
t2 = XOR8(s->b[3][pos], s->b[4][pos]);
s->b[4][pos] = OR8(s->b[3][pos], NOT8(s->b[4][pos]));
s->b[4][pos] = XOR8(s->b[4][pos], t0);
s->b[3][pos] = XOR8(s->b[3][pos], s->b[1][pos]);
s->b[3][pos] = OR8(s->b[3][pos], t0);
s->b[3][pos] = XOR8(s->b[3][pos], t1);
s->b[2][pos] = XOR8(s->b[2][pos], t1);
s->b[2][pos] = OR8(s->b[2][pos], s->b[1][pos]);
s->b[2][pos] = XOR8(s->b[2][pos], t2);
s->b[1][pos] = AND8(s->b[1][pos], NOT8(t1));
s->b[1][pos] = XOR8(s->b[1][pos], t2);
s->b[0][pos] = OR8(s->b[0][pos], t2);
(*xtemp).b[pos] = XOR8(s->b[0][pos], t0);
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint8_t C)
{
lane_t xtemp;
/* round constant */
s->b[2][0] = XOR8(s->b[2][0], C);
/* s-box layer */
for (uint8_t i = 0; i < 8; i++)
NONLINEAR_LAYER(s, &xtemp, i);
/* linear layer */
LINEAR_LAYER(s, xtemp.x);
}
/* ---------------------------------------------------------------- */
void PROUNDS(state_t *s, uint8_t nr)
{
switch (nr)
{
case 12:
ROUND(s, 0xf0);
ROUND(s, 0xe1);
ROUND(s, 0xd2);
ROUND(s, 0xc3);
ROUND(s, 0xb4);
ROUND(s, 0xa5);
case 6:
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
default:
ROUND(s, 0x4b);
}
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
#include "api.h"
#include "isap.h"
#include "crypto_aead.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Plaintext length is clen - tag length
*mlen = clen-ISAP_TAG_SZ;
// Generate tag
unsigned char tag[ISAP_TAG_SZ];
isap_mac(k,npub,ad,adlen,c,*mlen,tag);
// Compare tag
unsigned long eq_cnt = 0;
for(unsigned int i = 0; i < ISAP_TAG_SZ; i++) {
eq_cnt += (tag[i] == c[(*mlen)+i]);
}
// Perform decryption if tag is correct
if(eq_cnt == (unsigned long)ISAP_TAG_SZ){
if (*mlen > 0) {
isap_enc(k,npub,c,*mlen,m);
}
return 0;
} else {
return -1;
}
}
#ifndef FORCEINLINE_H_
#define FORCEINLINE_H_
/* define forceinline macro */
#ifdef _MSC_VER
#define forceinline __forceinline
#elif defined(__GNUC__)
#define forceinline inline __attribute__((__always_inline__))
#elif defined(__CLANG__)
#if __has_attribute(__always_inline__)
#define forceinline inline __attribute__((__always_inline__))
#else
#define forceinline inline
#endif
#else
#define forceinline inline
#endif
#endif /* FORCEINLINE_H_ */
#include <string.h>
#include <inttypes.h>
#include "api.h"
#include "isap.h"
#include "asconp.h"
#include "config.h"
forceinline void ABSORB_LANES(state_t *s, const uint8_t *src, uint64_t len)
{
while (len >= 8)
{
// Absorb full lanes
lane_t t0 = U64TOWORD(*(lane_t *)(src + 0));
s->x[0] ^= t0.x;
len -= ISAP_rH / 8;
src += ISAP_rH / 8;
P_sH;
}
if (len > 0)
{
// Absorb partial lane and padding
size_t i;
lane_t t0 = {0};
for (i = 0; i < len; i++)
{
t0.b[7 - i] ^= *src;
src++;
}
t0.b[7 - i] ^= 0x80;
t0 = TOBI(t0);
s->x[0] ^= t0.x;
P_sH;
}
else
{
// Absorb padded empty lane
s->b[0][7] ^= 0x80;
P_sH;
}
}
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const uint8_t *k,
const uint8_t *iv,
const uint8_t *y,
state_t *out,
const size_t outlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(k + 0));
s->l[1] = U64TOWORD(*(lane_t *)(k + 8));
s->l[2] = U64TOWORD(*(lane_t *)(iv + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sK;
// Absorb Y, bit by bit
for (size_t i = 0; i < 16; i++)
{
uint8_t y_byte = *y;
s->b[0][7] ^= (y_byte & 0x80) << 0;
P_sB;
s->b[0][7] ^= (y_byte & 0x40) << 1;
P_sB;
s->b[0][7] ^= (y_byte & 0x20) << 2;
P_sB;
s->b[0][7] ^= (y_byte & 0x10) << 3;
P_sB;
s->b[0][7] ^= (y_byte & 0x08) << 4;
P_sB;
s->b[0][7] ^= (y_byte & 0x04) << 5;
P_sB;
s->b[0][7] ^= (y_byte & 0x02) << 6;
P_sB;
s->b[0][7] ^= (y_byte & 0x01) << 7;
if (i != 15)
{
P_sB;
y += 1;
}
}
// Squeeze K*
P_sK;
out->x[0] = s->x[0];
out->x[1] = s->x[1];
if (outlen > 16)
{
out->x[2] = s->x[2];
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, uint64_t adlen,
const uint8_t *c, uint64_t clen,
uint8_t *tag)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[1] = U64TOWORD(*(lane_t *)(npub + 8));
s->l[2] = U64TOWORD(*(lane_t *)(ISAP_IV_A + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb associated data
ABSORB_LANES(s, ad, adlen);
// Domain seperation
s->w[4][0] ^= 0x1UL;
// Absorb ciphertext
ABSORB_LANES(s, c, clen);
// Derive KA*
s->l[0] = WORDTOU64(s->l[0]);
s->l[1] = WORDTOU64(s->l[1]);
isap_rk(k, ISAP_IV_KA, (const uint8_t *)(s->b), s, CRYPTO_KEYBYTES);
// Squeeze tag
P_sH;
lane_t t0 = WORDTOU64(s->l[0]);
memcpy(tag + 0, t0.b, 8);
t0 = WORDTOU64(s->l[1]);
memcpy(tag + 8, t0.b, 8);
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, uint64_t mlen,
uint8_t *c)
{
state_t state;
state_t *s = &state;
// Init state
isap_rk(k, ISAP_IV_KE, npub, s, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
s->l[3] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[4] = U64TOWORD(*(lane_t *)(npub + 8));
while (mlen >= ISAP_rH / 8)
{
// Encrypt full lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)c = *(uint64_t *)m ^ t0.x;
mlen -= ISAP_rH / 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
if (mlen > 0)
{
// Encrypt partial lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
for (uint8_t i = 0; i < mlen; i++)
{
*c = *m ^ t0.b[i];
m += 1;
c += 1;
}
}
}
/******************************************************************************/
/* Ascon-Hash */
/******************************************************************************/
#if ENABLE_HASH == 1
int crypto_hash(uint8_t *out, const uint8_t *in, unsigned long long inlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(ASCON_HASH_IV + 0));
s->x[1] = 0;
s->x[2] = 0;
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb input
ABSORB_LANES(s, in, inlen);
for (size_t i = 0; i < 4; i++)
{
// Squeeze full lanes
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)(out + 8 * i) = t0.x;
if (i < 3)
{
P_sH;
}
}
return 0;
}
#endif
#ifndef ISAP_H
#define ISAP_H
#include <inttypes.h>
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, const uint64_t adlen,
const uint8_t *c, const uint64_t clen,
uint8_t *tag);
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, const uint64_t mlen,
uint8_t *c);
#endif
#ifndef ASCONP_H
#define ASCONP_H
#include <inttypes.h>
#include "forceinline.h"
typedef union
{
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} lane_t;
typedef union
{
lane_t l[5];
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} state_t;
/* ---------------------------------------------------------------- */
#define P_sH PROUNDS(s, 12)
#define P_sB PROUNDS(s, 1)
#define P_sE PROUNDS(s, 6)
#define P_sK PROUNDS(s, 12)
/* ---------------------------------------------------------------- */
#define U64TOWORD(x) U64BIG(x)
#define WORDTOU64(x) U64BIG(x)
/* ---------------------------------------------------------------- */
#define TOBI(x) (x)
#define FROMBI(x) (x)
/* ---------------------------------------------------------------- */
lane_t U64BIG(lane_t x)
{
x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
(((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
(((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
(((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
return x;
}
/* ---------------------------------------------------------------- */
#define XMUL(i, x) \
do \
{ \
tmp = (uint16_t)a.b[i] * (1 << (x)); \
b.b[(byte_rol + (i)) & 0x7] ^= (uint8_t)tmp; \
b.b[(byte_rol + (i) + 1) & 0x7] ^= tmp >> 8; \
} while (0)
uint64_t ROR(uint64_t x, int n)
{
lane_t a = {.x = x}, b = {.x = 0ull};
int bit_rol = (64 - n) & 0x7;
int byte_rol = (64 - n) >> 3;
uint16_t tmp;
XMUL(0, bit_rol);
XMUL(1, bit_rol);
XMUL(2, bit_rol);
XMUL(3, bit_rol);
XMUL(4, bit_rol);
XMUL(5, bit_rol);
XMUL(6, bit_rol);
XMUL(7, bit_rol);
return b.x;
}
/* ---------------------------------------------------------------- */
forceinline uint8_t NOT8(uint8_t a) { return ~a; }
forceinline uint8_t XOR8(uint8_t a, uint8_t b) { return a ^ b; }
forceinline uint8_t AND8(uint8_t a, uint8_t b) { return a & b; }
forceinline uint8_t OR8(uint8_t a, uint8_t b) { return a | b; }
/* ---------------------------------------------------------------- */
forceinline void LINEAR_LAYER(state_t *s, uint64_t xtemp)
{
uint64_t temp;
temp = s->x[2] ^ ROR(s->x[2], 28 - 19);
s->x[0] = s->x[2] ^ ROR(temp, 19);
temp = s->x[4] ^ ROR(s->x[4], 6 - 1);
s->x[2] = s->x[4] ^ ROR(temp, 1);
temp = s->x[1] ^ ROR(s->x[1], 41 - 7);
s->x[4] = s->x[1] ^ ROR(temp, 7);
temp = s->x[3] ^ ROR(s->x[3], 61 - 39);
s->x[1] = s->x[3] ^ ROR(temp, 39);
temp = xtemp ^ ROR(xtemp, 17 - 10);
s->x[3] = xtemp ^ ROR(temp, 10);
}
/* ---------------------------------------------------------------- */
forceinline void NONLINEAR_LAYER(state_t *s, lane_t *xtemp, uint8_t pos)
{
uint8_t t0;
uint8_t t1;
uint8_t t2;
// Based on the round description of Ascon given in the Bachelor's thesis:
//"Optimizing Ascon on RISC-V" of Lars Jellema
// see https://github.com/Lucus16/ascon-riscv/
t0 = XOR8(s->b[1][pos], s->b[2][pos]);
t1 = XOR8(s->b[0][pos], s->b[4][pos]);
t2 = XOR8(s->b[3][pos], s->b[4][pos]);
s->b[4][pos] = OR8(s->b[3][pos], NOT8(s->b[4][pos]));
s->b[4][pos] = XOR8(s->b[4][pos], t0);
s->b[3][pos] = XOR8(s->b[3][pos], s->b[1][pos]);
s->b[3][pos] = OR8(s->b[3][pos], t0);
s->b[3][pos] = XOR8(s->b[3][pos], t1);
s->b[2][pos] = XOR8(s->b[2][pos], t1);
s->b[2][pos] = OR8(s->b[2][pos], s->b[1][pos]);
s->b[2][pos] = XOR8(s->b[2][pos], t2);
s->b[1][pos] = AND8(s->b[1][pos], NOT8(t1));
s->b[1][pos] = XOR8(s->b[1][pos], t2);
s->b[0][pos] = OR8(s->b[0][pos], t2);
(*xtemp).b[pos] = XOR8(s->b[0][pos], t0);
}
/* ---------------------------------------------------------------- */
forceinline void ROUND(state_t *s, uint8_t C)
{
lane_t xtemp;
/* round constant */
s->b[2][0] = XOR8(s->b[2][0], C);
/* s-box layer */
for (uint8_t i = 0; i < 8; i++)
NONLINEAR_LAYER(s, &xtemp, i);
/* linear layer */
LINEAR_LAYER(s, xtemp.x);
}
/* ---------------------------------------------------------------- */
#define RC(i) (i)
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#define INC -0x0f
#define END 0x3c
void PROUNDS(state_t *s, uint8_t nr)
{
int i = START(nr);
do
{
ROUND(s, RC(i));
i += INC;
} while (i != END);
}
/* ---------------------------------------------------------------- */
#endif // ASCONP_H
#ifndef CONFIG_H
#define CONFIG_H
#define ENABLE_HASH 0
#include "isap.h"
// ISAP-A-128a
const uint8_t ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const uint8_t ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
// Ascon-Hash
const uint8_t ASCON_HASH_IV[] = {0x00, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00};
#endif // CONFIG_H
../opt_8/crypto_aead.c
\ No newline at end of file
../opt_8/forceinline.h
\ No newline at end of file
#include <string.h>
#include <inttypes.h>
#include "api.h"
#include "isap.h"
#include "asconp.h"
#include "config.h"
void ABSORB_LANES(state_t *s, const uint8_t *src, uint64_t len)
{
while (len >= 8)
{
// Absorb full lanes
lane_t t0 = U64TOWORD(*(lane_t *)(src + 0));
s->x[0] ^= t0.x;
len -= ISAP_rH / 8;
src += ISAP_rH / 8;
P_sH;
}
if (len > 0)
{
// Absorb partial lane and padding
size_t i;
lane_t t0 = {0};
for (i = 0; i < len; i++)
{
t0.b[7 - i] ^= *src;
src++;
}
t0.b[7 - i] ^= 0x80;
t0 = TOBI(t0);
s->x[0] ^= t0.x;
P_sH;
}
else
{
// Absorb padded empty lane
s->b[0][7] ^= 0x80;
P_sH;
}
}
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const uint8_t *k,
const uint8_t *iv,
const uint8_t *y,
state_t *out,
const size_t outlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(k + 0));
s->l[1] = U64TOWORD(*(lane_t *)(k + 8));
s->l[2] = U64TOWORD(*(lane_t *)(iv + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sK;
// Absorb Y, bit by bit
for (size_t i = 0; i < 16; i++)
{
uint8_t y_byte = *y;
s->b[0][7] ^= (y_byte & 0x80) << 0;
P_sB;
s->b[0][7] ^= (y_byte & 0x40) << 1;
P_sB;
s->b[0][7] ^= (y_byte & 0x20) << 2;
P_sB;
s->b[0][7] ^= (y_byte & 0x10) << 3;
P_sB;
s->b[0][7] ^= (y_byte & 0x08) << 4;
P_sB;
s->b[0][7] ^= (y_byte & 0x04) << 5;
P_sB;
s->b[0][7] ^= (y_byte & 0x02) << 6;
P_sB;
s->b[0][7] ^= (y_byte & 0x01) << 7;
if (i != 15)
{
P_sB;
y += 1;
}
}
// Squeeze K*
P_sK;
out->x[0] = s->x[0];
out->x[1] = s->x[1];
if (outlen > 16)
{
out->x[2] = s->x[2];
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *ad, uint64_t adlen,
const uint8_t *c, uint64_t clen,
uint8_t *tag)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[1] = U64TOWORD(*(lane_t *)(npub + 8));
s->l[2] = U64TOWORD(*(lane_t *)(ISAP_IV_A + 0));
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb associated data
ABSORB_LANES(s, ad, adlen);
// Domain seperation
s->w[4][0] ^= 0x1UL;
// Absorb ciphertext
ABSORB_LANES(s, c, clen);
// Derive KA*
s->l[0] = WORDTOU64(s->l[0]);
s->l[1] = WORDTOU64(s->l[1]);
isap_rk(k, ISAP_IV_KA, (const uint8_t *)(s->b), s, CRYPTO_KEYBYTES);
// Squeeze tag
P_sH;
lane_t t0 = WORDTOU64(s->l[0]);
memcpy(tag + 0, t0.b, 8);
t0 = WORDTOU64(s->l[1]);
memcpy(tag + 8, t0.b, 8);
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const uint8_t *k,
const uint8_t *npub,
const uint8_t *m, uint64_t mlen,
uint8_t *c)
{
state_t state;
state_t *s = &state;
// Init state
isap_rk(k, ISAP_IV_KE, npub, s, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
s->l[3] = U64TOWORD(*(lane_t *)(npub + 0));
s->l[4] = U64TOWORD(*(lane_t *)(npub + 8));
while (mlen >= ISAP_rH / 8)
{
// Encrypt full lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)c = *(uint64_t *)m ^ t0.x;
mlen -= ISAP_rH / 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
if (mlen > 0)
{
// Encrypt partial lanes
P_sE;
lane_t t0 = WORDTOU64(s->l[0]);
for (uint8_t i = 0; i < mlen; i++)
{
*c = *m ^ t0.b[i];
m += 1;
c += 1;
}
}
}
/******************************************************************************/
/* Ascon-Hash */
/******************************************************************************/
#if ENABLE_HASH == 1
int crypto_hash(uint8_t *out, const uint8_t *in, unsigned long long inlen)
{
state_t state;
state_t *s = &state;
// Initialize
s->l[0] = U64TOWORD(*(lane_t *)(ASCON_HASH_IV + 0));
s->x[1] = 0;
s->x[2] = 0;
s->x[3] = 0;
s->x[4] = 0;
P_sH;
// Absorb input
ABSORB_LANES(s, in, inlen);
for (size_t i = 0; i < 4; i++)
{
// Squeeze full lanes
lane_t t0 = WORDTOU64(s->l[0]);
*(uint64_t *)(out + 8 * i) = t0.x;
if (i < 3)
{
P_sH;
}
}
return 0;
}
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment