Commit bd5da236 by Robert Primas Committed by Sebastian Renner

isap

parent ec7e6d42
This source diff could not be displayed because it is too large. You can view the blob instead.
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
typedef unsigned char u8;
typedef uint32_t u32;
typedef unsigned long long u64;
typedef struct
{
u32 e;
u32 o;
} u32_2;
// Round constants, bit-interleaved
u32 rc_o[12] = {0xc, 0xc, 0x9, 0x9, 0xc, 0xc, 0x9, 0x9, 0x6, 0x6, 0x3, 0x3};
u32 rc_e[12] = {0xc, 0x9, 0xc, 0x9, 0x6, 0x3, 0x6, 0x3, 0xc, 0x9, 0xc, 0x9};
/* ---------------------------------------------------------------- */
u64 U64BIG(u64 x)
{
return ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) |
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) |
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) |
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56));
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void to_bit_interleaving(u32_2 *out, u64 in)
{
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
(*out).e = (lo & 0x0000FFFF) | (hi << 16);
(*out).o = (lo >> 16) | (hi & 0xFFFF0000);
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void from_bit_interleaving(u64 *out, u32_2 in)
{
u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16);
u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000);
u32 r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
*out = (u64)hi << 32 | lo;
}
/* ---------------------------------------------------------------- */
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
/* ---------------------------------------------------------------- */
void static inline PX(u32 rounds, u32_2* x0, u32_2* x1, u32_2* x2, u32_2* x3, u32_2* x4) {
u32_2 t0, t1, t2, t3, t4;
for (u32 r = 12 - rounds; r < 12; r++){
/* rcon */
(*x2).e ^= rc_e[r];
(*x2).o ^= rc_o[r];
/* non-linear layer */
(*x0).e ^= (*x4).e;
(*x0).o ^= (*x4).o;
(*x4).e ^= (*x3).e;
(*x4).o ^= (*x3).o;
(*x2).e ^= (*x1).e;
(*x2).o ^= (*x1).o;
(t0).e = (*x0).e;
(t0).o = (*x0).o;
(t4).e = (*x4).e;
(t4).o = (*x4).o;
(t3).e = (*x3).e;
(t3).o = (*x3).o;
(t1).e = (*x1).e;
(t1).o = (*x1).o;
(t2).e = (*x2).e;
(t2).o = (*x2).o;
(*x0).e = t0.e ^ (~t1.e & t2.e);
(*x0).o = t0.o ^ (~t1.o & t2.o);
(*x2).e = t2.e ^ (~t3.e & t4.e);
(*x2).o = t2.o ^ (~t3.o & t4.o);
(*x4).e = t4.e ^ (~t0.e & t1.e);
(*x4).o = t4.o ^ (~t0.o & t1.o);
(*x1).e = t1.e ^ (~t2.e & t3.e);
(*x1).o = t1.o ^ (~t2.o & t3.o);
(*x3).e = t3.e ^ (~t4.e & t0.e);
(*x3).o = t3.o ^ (~t4.o & t0.o);
(*x1).e ^= (*x0).e;
(*x1).o ^= (*x0).o;
(*x3).e ^= (*x2).e;
(*x3).o ^= (*x2).o;
(*x0).e ^= (*x4).e;
(*x0).o ^= (*x4).o;
/* linear layer */
t0.e = (*x0).e ^ ROTR32((*x0).o, 4);
t0.o = (*x0).o ^ ROTR32((*x0).e, 5);
t1.e = (*x1).e ^ ROTR32((*x1).e, 11);
t1.o = (*x1).o ^ ROTR32((*x1).o, 11);
t2.e = (*x2).e ^ ROTR32((*x2).o, 2);
t2.o = (*x2).o ^ ROTR32((*x2).e, 3);
t3.e = (*x3).e ^ ROTR32((*x3).o, 3);
t3.o = (*x3).o ^ ROTR32((*x3).e, 4);
t4.e = (*x4).e ^ ROTR32((*x4).e, 17);
t4.o = (*x4).o ^ ROTR32((*x4).o, 17);
(*x0).e ^= ROTR32(t0.o, 9);
(*x0).o ^= ROTR32(t0.e, 10);
(*x1).e ^= ROTR32(t1.o, 19);
(*x1).o ^= ROTR32(t1.e, 20);
(*x2).e ^= t2.o;
(*x2).o ^= ROTR32(t2.e, 1);
(*x3).e ^= ROTR32(t3.e, 5);
(*x3).o ^= ROTR32(t3.o, 5);
(*x4).e ^= ROTR32(t4.o, 3);
(*x4).o ^= ROTR32(t4.e, 4);
(*x2).e = ~(*x2).e;
(*x2).o = ~(*x2).o;
}
}
#endif // ASCONP_H_
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include "api.h"
#include "isap.h"
#include "asconp.h"
const u8 ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
#define P_sB PX(1,&x0,&x1,&x2,&x3,&x4)
#define P_sE PX(6,&x0,&x1,&x2,&x3,&x4)
#define P_sH PX(12,&x0,&x1,&x2,&x3,&x4)
#define P_sK PX(12,&x0,&x1,&x2,&x3,&x4)
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const u8 *k,
const u8 *iv,
const u8 *y,
u8 *out,
const u8 outlen)
{
// State variables
u32_2 x0, x1, x2, x3, x4;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)(k + 0)));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(k + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(iv + 0)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sK;
// Absorb Y, bit by bit
for (u8 i = 0; i < 127; i++) {
u8 cur_byte_pos = i / 8;
u8 cur_bit_pos = 7 - (i % 8);
u32 cur_bit = ((y[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7;
x0.o ^= ((u32)cur_bit) << 24;
P_sB;
}
u8 cur_bit = ((y[15]) & 0x01) << 7;
x0.o ^= ((u32)cur_bit) << (24);
// Squeeze - Derive K*
P_sK;
*(u32 *)(out + 0) = x0.o;
*(u32 *)(out + 4) = x0.e;
*(u32 *)(out + 8) = x1.o;
*(u32 *)(out + 12) = x1.e;
if (outlen > 16) {
*(u32 *)(out + 16) = x2.o;
*(u32 *)(out + 20) = x2.e;
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const u8 *k,
const u8 *npub,
const u8 *ad, u64 adlen,
const u8 *c, u64 clen,
u8 *tag)
{
// State and temporary variables
u32_2 x0, x1, x2, x3, x4;
u32_2 t0;
u64 tmp0;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)npub + 0));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(npub + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(ISAP_IV_A)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sH;
// Absorb full lanes of AD
while (adlen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)ad));
x0.e ^= t0.e;
x0.o ^= t0.o;
adlen -= ISAP_rH / 8;
ad += ISAP_rH / 8;
P_sH;
}
// Absorb partial lane of AD and add padding
if (adlen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < adlen; i++)
{
tmp0_bytes[i] = *ad;
ad += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb AD padding if not already done before
if (adlen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Domain Seperation
x4.e ^= ((u32)0x01);
// Absorb full lanes of C
while (clen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)c));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
clen -= ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Absorb partial lane of C and add padding
if (clen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < clen; i++)
{
tmp0_bytes[i] = *c;
c += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb C padding if not already done before
if (clen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Finalize - Derive Ka*
u64 y64[CRYPTO_KEYBYTES / 8];
from_bit_interleaving(&tmp0, x0);
y64[0] = U64BIG(tmp0);
from_bit_interleaving(&tmp0, x1);
y64[1] = U64BIG(tmp0);
u32 ka_star32[CRYPTO_KEYBYTES / 4];
isap_rk(k, ISAP_IV_KA, (u8 *)y64, (u8 *)ka_star32, CRYPTO_KEYBYTES);
// Finalize - Squeeze T
x0.o = ka_star32[0];
x0.e = ka_star32[1];
x1.o = ka_star32[2];
x1.e = ka_star32[3];
P_sH;
from_bit_interleaving(&tmp0, x0);
*(u64 *)(tag + 0) = U64BIG(tmp0);
from_bit_interleaving(&tmp0, x1);
*(u64 *)(tag + 8) = U64BIG(tmp0);
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const u8 *k,
const u8 *npub,
const u8 *m, u64 mlen,
u8 *c)
{
// Derive Ke
u8 ke[ISAP_STATE_SZ - CRYPTO_NPUBBYTES];
isap_rk(k, ISAP_IV_KE, npub, ke, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
// State and temporary variables
u32_2 x0, x1, x2, x3, x4;
u64 tmp0;
// Init State
x0.o = *(u32 *)(ke + 0);
x0.e = *(u32 *)(ke + 4);
x1.o = *(u32 *)(ke + 8);
x1.e = *(u32 *)(ke + 12);
x2.o = *(u32 *)(ke + 16);
x2.e = *(u32 *)(ke + 20);
to_bit_interleaving(&x3, U64BIG(*(u64 *)npub));
to_bit_interleaving(&x4, U64BIG(*(u64 *)(npub + 8)));
// Squeeze full lanes
while (mlen >= 8)
{
P_sE;
from_bit_interleaving(&tmp0, x0);
*(u64 *)c = *(u64 *)m ^ U64BIG(tmp0);
mlen -= 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Squeeze partial lane
if (mlen > 0)
{
P_sE;
from_bit_interleaving(&tmp0, x0);
tmp0 = U64BIG(tmp0);
u8 *tmp0_bytes = (u8 *)&tmp0;
for (u8 i = 0; i < mlen; i++)
{
*c = *m ^ tmp0_bytes[i];
m += 1;
c += 1;
}
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *c, unsigned long long clen,
unsigned char *tag);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
unsigned char *out,
const unsigned char outlen);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, unsigned long long mlen,
unsigned char *c);
#endif
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include "api.h"
#include "isap.h"
#include "asconp.h"
const u8 ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
#define P_sB_UROL P_1()
#define P_sE P_LOOP(6)
#define P_sE_UROL P_6()
#define P_sH P_LOOP(12)
#define P_sH_UROL P_12()
#define P_sK P_LOOP(12)
#define P_sK_UROL P_12()
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const u8 *k,
const u8 *iv,
const u8 *y,
u8 *out,
const u32 outlen)
{
// State variables
u32_2 x0, x1, x2, x3, x4;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)(k + 0)));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(k + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(iv + 0)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sK;
// Absorb Y, bit by bit
for (u8 i = 0; i < 16; i++)
{
u32 cur_byte = *y;
x0.o ^= (cur_byte & 0x80) << 24;
P_sB_UROL;
x0.o ^= (cur_byte & 0x40) << 25;
P_sB_UROL;
x0.o ^= (cur_byte & 0x20) << 26;
P_sB_UROL;
x0.o ^= (cur_byte & 0x10) << 27;
P_sB_UROL;
x0.o ^= (cur_byte & 0x08) << 28;
P_sB_UROL;
x0.o ^= (cur_byte & 0x04) << 29;
P_sB_UROL;
x0.o ^= (cur_byte & 0x02) << 30;
P_sB_UROL;
x0.o ^= (cur_byte & 0x01) << 31;
if (i != 15)
{
P_sB_UROL;
y += 1;
}
}
// Squeeze - Derive K*
P_sK;
*(u32 *)(out + 0) = x0.o;
*(u32 *)(out + 4) = x0.e;
*(u32 *)(out + 8) = x1.o;
*(u32 *)(out + 12) = x1.e;
if (outlen > 16)
{
*(u32 *)(out + 16) = x2.o;
*(u32 *)(out + 20) = x2.e;
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const u8 *k,
const u8 *npub,
const u8 *ad, u64 adlen,
const u8 *c, u64 clen,
u8 *tag)
{
// State and temporary variables
u32_2 x0, x1, x2, x3, x4;
u32_2 t0;
u64 tmp0;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)npub + 0));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(npub + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(ISAP_IV_A)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sH;
// Absorb full lanes of AD
while (adlen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)ad));
x0.e ^= t0.e;
x0.o ^= t0.o;
adlen -= ISAP_rH / 8;
ad += ISAP_rH / 8;
P_sH_UROL;
}
// Absorb partial lane of AD and add padding
if (adlen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < adlen; i++)
{
tmp0_bytes[i] = *ad;
ad += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb AD padding if not already done before
if (adlen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Domain Seperation
x4.e ^= ((u32)0x01);
// Absorb full lanes of C
while (clen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)c));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH_UROL;
clen -= ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Absorb partial lane of C and add padding
if (clen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < clen; i++)
{
tmp0_bytes[i] = *c;
c += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb C padding if not already done before