Commit bd5da236 by Robert Primas Committed by Sebastian Renner

isap

parent ec7e6d42
This source diff could not be displayed because it is too large. You can view the blob instead.
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
typedef unsigned char u8;
typedef uint32_t u32;
typedef unsigned long long u64;
typedef struct
{
u32 e;
u32 o;
} u32_2;
// Round constants, bit-interleaved
u32 rc_o[12] = {0xc, 0xc, 0x9, 0x9, 0xc, 0xc, 0x9, 0x9, 0x6, 0x6, 0x3, 0x3};
u32 rc_e[12] = {0xc, 0x9, 0xc, 0x9, 0x6, 0x3, 0x6, 0x3, 0xc, 0x9, 0xc, 0x9};
/* ---------------------------------------------------------------- */
u64 U64BIG(u64 x)
{
return ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) |
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) |
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) |
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56));
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void to_bit_interleaving(u32_2 *out, u64 in)
{
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
(*out).e = (lo & 0x0000FFFF) | (hi << 16);
(*out).o = (lo >> 16) | (hi & 0xFFFF0000);
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void from_bit_interleaving(u64 *out, u32_2 in)
{
u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16);
u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000);
u32 r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
*out = (u64)hi << 32 | lo;
}
/* ---------------------------------------------------------------- */
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
/* ---------------------------------------------------------------- */
void static inline PX(u32 rounds, u32_2* x0, u32_2* x1, u32_2* x2, u32_2* x3, u32_2* x4) {
u32_2 t0, t1, t2, t3, t4;
for (u32 r = 12 - rounds; r < 12; r++){
/* rcon */
(*x2).e ^= rc_e[r];
(*x2).o ^= rc_o[r];
/* non-linear layer */
(*x0).e ^= (*x4).e;
(*x0).o ^= (*x4).o;
(*x4).e ^= (*x3).e;
(*x4).o ^= (*x3).o;
(*x2).e ^= (*x1).e;
(*x2).o ^= (*x1).o;
(t0).e = (*x0).e;
(t0).o = (*x0).o;
(t4).e = (*x4).e;
(t4).o = (*x4).o;
(t3).e = (*x3).e;
(t3).o = (*x3).o;
(t1).e = (*x1).e;
(t1).o = (*x1).o;
(t2).e = (*x2).e;
(t2).o = (*x2).o;
(*x0).e = t0.e ^ (~t1.e & t2.e);
(*x0).o = t0.o ^ (~t1.o & t2.o);
(*x2).e = t2.e ^ (~t3.e & t4.e);
(*x2).o = t2.o ^ (~t3.o & t4.o);
(*x4).e = t4.e ^ (~t0.e & t1.e);
(*x4).o = t4.o ^ (~t0.o & t1.o);
(*x1).e = t1.e ^ (~t2.e & t3.e);
(*x1).o = t1.o ^ (~t2.o & t3.o);
(*x3).e = t3.e ^ (~t4.e & t0.e);
(*x3).o = t3.o ^ (~t4.o & t0.o);
(*x1).e ^= (*x0).e;
(*x1).o ^= (*x0).o;
(*x3).e ^= (*x2).e;
(*x3).o ^= (*x2).o;
(*x0).e ^= (*x4).e;
(*x0).o ^= (*x4).o;
/* linear layer */
t0.e = (*x0).e ^ ROTR32((*x0).o, 4);
t0.o = (*x0).o ^ ROTR32((*x0).e, 5);
t1.e = (*x1).e ^ ROTR32((*x1).e, 11);
t1.o = (*x1).o ^ ROTR32((*x1).o, 11);
t2.e = (*x2).e ^ ROTR32((*x2).o, 2);
t2.o = (*x2).o ^ ROTR32((*x2).e, 3);
t3.e = (*x3).e ^ ROTR32((*x3).o, 3);
t3.o = (*x3).o ^ ROTR32((*x3).e, 4);
t4.e = (*x4).e ^ ROTR32((*x4).e, 17);
t4.o = (*x4).o ^ ROTR32((*x4).o, 17);
(*x0).e ^= ROTR32(t0.o, 9);
(*x0).o ^= ROTR32(t0.e, 10);
(*x1).e ^= ROTR32(t1.o, 19);
(*x1).o ^= ROTR32(t1.e, 20);
(*x2).e ^= t2.o;
(*x2).o ^= ROTR32(t2.e, 1);
(*x3).e ^= ROTR32(t3.e, 5);
(*x3).o ^= ROTR32(t3.o, 5);
(*x4).e ^= ROTR32(t4.o, 3);
(*x4).o ^= ROTR32(t4.e, 4);
(*x2).e = ~(*x2).e;
(*x2).o = ~(*x2).o;
}
}
#endif // ASCONP_H_
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include "api.h"
#include "isap.h"
#include "asconp.h"
const u8 ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
#define P_sB PX(1,&x0,&x1,&x2,&x3,&x4)
#define P_sE PX(6,&x0,&x1,&x2,&x3,&x4)
#define P_sH PX(12,&x0,&x1,&x2,&x3,&x4)
#define P_sK PX(12,&x0,&x1,&x2,&x3,&x4)
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const u8 *k,
const u8 *iv,
const u8 *y,
u8 *out,
const u8 outlen)
{
// State variables
u32_2 x0, x1, x2, x3, x4;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)(k + 0)));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(k + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(iv + 0)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sK;
// Absorb Y, bit by bit
for (u8 i = 0; i < 127; i++) {
u8 cur_byte_pos = i / 8;
u8 cur_bit_pos = 7 - (i % 8);
u32 cur_bit = ((y[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7;
x0.o ^= ((u32)cur_bit) << 24;
P_sB;
}
u8 cur_bit = ((y[15]) & 0x01) << 7;
x0.o ^= ((u32)cur_bit) << (24);
// Squeeze - Derive K*
P_sK;
*(u32 *)(out + 0) = x0.o;
*(u32 *)(out + 4) = x0.e;
*(u32 *)(out + 8) = x1.o;
*(u32 *)(out + 12) = x1.e;
if (outlen > 16) {
*(u32 *)(out + 16) = x2.o;
*(u32 *)(out + 20) = x2.e;
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const u8 *k,
const u8 *npub,
const u8 *ad, u64 adlen,
const u8 *c, u64 clen,
u8 *tag)
{
// State and temporary variables
u32_2 x0, x1, x2, x3, x4;
u32_2 t0;
u64 tmp0;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)npub + 0));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(npub + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(ISAP_IV_A)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sH;
// Absorb full lanes of AD
while (adlen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)ad));
x0.e ^= t0.e;
x0.o ^= t0.o;
adlen -= ISAP_rH / 8;
ad += ISAP_rH / 8;
P_sH;
}
// Absorb partial lane of AD and add padding
if (adlen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < adlen; i++)
{
tmp0_bytes[i] = *ad;
ad += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb AD padding if not already done before
if (adlen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Domain Seperation
x4.e ^= ((u32)0x01);
// Absorb full lanes of C
while (clen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)c));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
clen -= ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Absorb partial lane of C and add padding
if (clen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < clen; i++)
{
tmp0_bytes[i] = *c;
c += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb C padding if not already done before
if (clen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Finalize - Derive Ka*
u64 y64[CRYPTO_KEYBYTES / 8];
from_bit_interleaving(&tmp0, x0);
y64[0] = U64BIG(tmp0);
from_bit_interleaving(&tmp0, x1);
y64[1] = U64BIG(tmp0);
u32 ka_star32[CRYPTO_KEYBYTES / 4];
isap_rk(k, ISAP_IV_KA, (u8 *)y64, (u8 *)ka_star32, CRYPTO_KEYBYTES);
// Finalize - Squeeze T
x0.o = ka_star32[0];
x0.e = ka_star32[1];
x1.o = ka_star32[2];
x1.e = ka_star32[3];
P_sH;
from_bit_interleaving(&tmp0, x0);
*(u64 *)(tag + 0) = U64BIG(tmp0);
from_bit_interleaving(&tmp0, x1);
*(u64 *)(tag + 8) = U64BIG(tmp0);
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const u8 *k,
const u8 *npub,
const u8 *m, u64 mlen,
u8 *c)
{
// Derive Ke
u8 ke[ISAP_STATE_SZ - CRYPTO_NPUBBYTES];
isap_rk(k, ISAP_IV_KE, npub, ke, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
// State and temporary variables
u32_2 x0, x1, x2, x3, x4;
u64 tmp0;
// Init State
x0.o = *(u32 *)(ke + 0);
x0.e = *(u32 *)(ke + 4);
x1.o = *(u32 *)(ke + 8);
x1.e = *(u32 *)(ke + 12);
x2.o = *(u32 *)(ke + 16);
x2.e = *(u32 *)(ke + 20);
to_bit_interleaving(&x3, U64BIG(*(u64 *)npub));
to_bit_interleaving(&x4, U64BIG(*(u64 *)(npub + 8)));
// Squeeze full lanes
while (mlen >= 8)
{
P_sE;
from_bit_interleaving(&tmp0, x0);
*(u64 *)c = *(u64 *)m ^ U64BIG(tmp0);
mlen -= 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Squeeze partial lane
if (mlen > 0)
{
P_sE;
from_bit_interleaving(&tmp0, x0);
tmp0 = U64BIG(tmp0);
u8 *tmp0_bytes = (u8 *)&tmp0;
for (u8 i = 0; i < mlen; i++)
{
*c = *m ^ tmp0_bytes[i];
m += 1;
c += 1;
}
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *c, unsigned long long clen,
unsigned char *tag);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
unsigned char *out,
const unsigned char outlen);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, unsigned long long mlen,
unsigned char *c);
#endif
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#ifndef ASCONP_H_
#define ASCONP_H_
typedef unsigned char u8;
typedef unsigned long u32;
typedef unsigned long long u64;
typedef struct
{
u32 e;
u32 o;
} u32_2;
// Round constants, bit-interleaved
u32 rc_o[12] = {0xc, 0xc, 0x9, 0x9, 0xc, 0xc, 0x9, 0x9, 0x6, 0x6, 0x3, 0x3};
u32 rc_e[12] = {0xc, 0x9, 0xc, 0x9, 0x6, 0x3, 0x6, 0x3, 0xc, 0x9, 0xc, 0x9};
/* ---------------------------------------------------------------- */
u64 U64BIG(u64 x)
{
return ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) |
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) |
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) |
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56));
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void to_bit_interleaving(u32_2 *out, u64 in)
{
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
(*out).e = (lo & 0x0000FFFF) | (hi << 16);
(*out).o = (lo >> 16) | (hi & 0xFFFF0000);
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void from_bit_interleaving(u64 *out, u32_2 in)
{
u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16);
u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000);
u32 r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
*out = (u64)hi << 32 | lo;
}
/* ---------------------------------------------------------------- */
#define ROUND() \
do \
{ \
__asm__ __volatile__( \
"eor %[x0_e], %[x0_e], %[x4_e]\n\t" \
"eor %[x0_o], %[x0_o], %[x4_o]\n\t" \
"eor %[x4_e], %[x4_e], %[x3_e]\n\t" \
"eor %[x4_o], %[x4_o], %[x3_o]\n\t" \
"eor %[x2_e], %[x2_e], %[x1_e]\n\t" \
"eor %[x2_o], %[x2_o], %[x1_o]\n\t" \
"bic %[reg0], %[x0_e], %[x4_e]\n\t" \
"bic %[reg1], %[x4_e], %[x3_e]\n\t" \
"bic %[reg2], %[x2_e], %[x1_e]\n\t" \
"bic %[reg3], %[x1_e], %[x0_e]\n\t" \
"eor %[x2_e], %[x2_e], %[reg1]\n\t" \
"eor %[x0_e], %[x0_e], %[reg2]\n\t" \
"eor %[x4_e], %[x4_e], %[reg3]\n\t" \
"bic %[reg3], %[x3_e], %[x2_e]\n\t" \
"eor %[x3_e], %[x3_e], %[reg0]\n\t" \
"bic %[reg2], %[x0_o], %[x4_o]\n\t" \
"bic %[reg0], %[x2_o], %[x1_o]\n\t" \
"bic %[reg1], %[x4_o], %[x3_o]\n\t" \
"eor %[x1_e], %[x1_e], %[reg3]\n\t" \
"eor %[x0_o], %[x0_o], %[reg0]\n\t" \
"eor %[x2_o], %[x2_o], %[reg1]\n\t" \
"bic %[reg3], %[x1_o], %[x0_o]\n\t" \
"bic %[reg0], %[x3_o], %[x2_o]\n\t" \
"eor %[x3_o], %[x3_o], %[reg2]\n\t" \
"eor %[x3_o], %[x3_o], %[x2_o]\n\t" \
"eor %[x4_o], %[x4_o], %[reg3]\n\t" \
"eor %[x1_o], %[x1_o], %[reg0]\n\t" \
"eor %[x3_e], %[x3_e], %[x2_e]\n\t" \
"eor %[x1_e], %[x1_e], %[x0_e]\n\t" \
"eor %[x1_o], %[x1_o], %[x0_o]\n\t" \
"eor %[x0_e], %[x0_e], %[x4_e]\n\t" \
"eor %[x0_o], %[x0_o], %[x4_o]\n\t" \
"mvn %[x2_e], %[x2_e]\n\t" \
"mvn %[x2_o], %[x2_o]\n\t" \
"eor %[reg0], %[x0_e], %[x0_o], ror #4\n\t" \
"eor %[reg1], %[x0_o], %[x0_e], ror #5\n\t" \
"eor %[reg2], %[x1_e], %[x1_e], ror #11\n\t" \
"eor %[reg3], %[x1_o], %[x1_o], ror #11\n\t" \
"eor %[x0_e], %[x0_e], %[reg1], ror #9\n\t" \
"eor %[x0_o], %[x0_o], %[reg0], ror #10\n\t" \
"eor %[x1_e], %[x1_e], %[reg3], ror #19\n\t" \
"eor %[x1_o], %[x1_o], %[reg2], ror #20\n\t" \
"eor %[reg0], %[x2_e], %[x2_o], ror #2\n\t" \
"eor %[reg1], %[x2_o], %[x2_e], ror #3\n\t" \
"eor %[reg2], %[x3_e], %[x3_o], ror #3\n\t" \
"eor %[reg3], %[x3_o], %[x3_e], ror #4\n\t" \
"eor %[x2_e], %[x2_e], %[reg1]\n\t" \
"eor %[x2_o], %[x2_o], %[reg0], ror #1\n\t" \
"eor %[x3_e], %[x3_e], %[reg2], ror #5\n\t" \
"eor %[x3_o], %[x3_o], %[reg3], ror #5\n\t" \
"eor %[reg0], %[x4_e], %[x4_e], ror #17\n\t" \
"eor %[reg1], %[x4_o], %[x4_o], ror #17\n\t" \
"eor %[x4_e], %[x4_e], %[reg1], ror #3\n\t" \
"eor %[x4_o], %[x4_o], %[reg0], ror #4\n\t" \
: [ x0_e ] "+r"(x0.e), [ x1_e ] "+r"(x1.e), \
[ x2_e ] "+r"(x2.e), [ x3_e ] "+r"(x3.e), \
[ x4_e ] "+r"(x4.e), [ x0_o ] "+r"(x0.o), \
[ x1_o ] "+r"(x1.o), [ x2_o ] "+r"(x2.o), \
[ x3_o ] "+r"(x3.o), [ x4_o ] "+r"(x4.o), \
[ reg0 ] "=r"(reg0), [ reg1 ] "=r"(reg1), \
[ reg2 ] "=r"(reg2), [ reg3 ] "=r"(reg3)::); \
} while (0)
/* ---------------------------------------------------------------- */
#define P_LOOP(rounds) \
do \
{ \
u32 reg0, reg1, reg2, reg3; \
for (u32 r = 12 - rounds; r < 12; r++) \
{ \
x2.e ^= rc_e[r]; \
x2.o ^= rc_o[r]; \
ROUND(); \
} \
} while (0)
/* ---------------------------------------------------------------- */
#define P_12() \
do \
{ \
u32 reg0, reg1, reg2, reg3; \
x2.e ^= 0xc; \
x2.o ^= 0xc; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0xc; \
ROUND(); \
x2.e ^= 0xc; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0x6; \
x2.o ^= 0xc; \
ROUND(); \
x2.e ^= 0x3; \
x2.o ^= 0xc; \
ROUND(); \
x2.e ^= 0x6; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0x3; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0xc; \
x2.o ^= 0x6; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0x6; \
ROUND(); \
x2.e ^= 0xc; \
x2.o ^= 0x3; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0x3; \
ROUND(); \
} while (0)
/* ---------------------------------------------------------------- */
#define P_6() \
do \
{ \
u32 reg0, reg1, reg2, reg3; \
x2.e ^= 0x6; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0x3; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0xc; \
x2.o ^= 0x6; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0x6; \
ROUND(); \
x2.e ^= 0xc; \
x2.o ^= 0x3; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0x3; \
ROUND(); \
} while (0)
/* ---------------------------------------------------------------- */
#define P_1() \
do \
{ \
u32 reg0, reg1, reg2, reg3; \
x2.e ^= 0x9; \
x2.o ^= 0x3; \
ROUND(); \
} while (0)
#endif // ASCONP_H_
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include "api.h"
#include "isap.h"
#include "asconp.h"
const u8 ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
#define P_sB_UROL P_1()
#define P_sE P_LOOP(6)
#define P_sE_UROL P_6()
#define P_sH P_LOOP(12)
#define P_sH_UROL P_12()
#define P_sK P_LOOP(12)
#define P_sK_UROL P_12()
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const u8 *k,
const u8 *iv,
const u8 *y,
u8 *out,
const u32 outlen)
{
// State variables
u32_2 x0, x1, x2, x3, x4;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)(k + 0)));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(k + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(iv + 0)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sK;
// Absorb Y, bit by bit
for (u8 i = 0; i < 16; i++)
{
u32 cur_byte = *y;
x0.o ^= (cur_byte & 0x80) << 24;
P_sB_UROL;
x0.o ^= (cur_byte & 0x40) << 25;
P_sB_UROL;
x0.o ^= (cur_byte & 0x20) << 26;
P_sB_UROL;
x0.o ^= (cur_byte & 0x10) << 27;
P_sB_UROL;
x0.o ^= (cur_byte & 0x08) << 28;
P_sB_UROL;
x0.o ^= (cur_byte & 0x04) << 29;
P_sB_UROL;
x0.o ^= (cur_byte & 0x02) << 30;
P_sB_UROL;
x0.o ^= (cur_byte & 0x01) << 31;
if (i != 15)
{
P_sB_UROL;
y += 1;
}
}
// Squeeze - Derive K*
P_sK;
*(u32 *)(out + 0) = x0.o;
*(u32 *)(out + 4) = x0.e;
*(u32 *)(out + 8) = x1.o;
*(u32 *)(out + 12) = x1.e;
if (outlen > 16)
{
*(u32 *)(out + 16) = x2.o;
*(u32 *)(out + 20) = x2.e;
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const u8 *k,
const u8 *npub,
const u8 *ad, u64 adlen,
const u8 *c, u64 clen,
u8 *tag)
{
// State and temporary variables
u32_2 x0, x1, x2, x3, x4;
u32_2 t0;
u64 tmp0;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)npub + 0));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(npub + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(ISAP_IV_A)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sH;
// Absorb full lanes of AD
while (adlen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)ad));
x0.e ^= t0.e;
x0.o ^= t0.o;
adlen -= ISAP_rH / 8;
ad += ISAP_rH / 8;
P_sH_UROL;
}
// Absorb partial lane of AD and add padding
if (adlen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < adlen; i++)
{
tmp0_bytes[i] = *ad;
ad += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb AD padding if not already done before
if (adlen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Domain Seperation
x4.e ^= ((u32)0x01);
// Absorb full lanes of C
while (clen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)c));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH_UROL;
clen -= ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Absorb partial lane of C and add padding
if (clen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < clen; i++)
{
tmp0_bytes[i] = *c;
c += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb C padding if not already done before
if (clen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Finalize - Derive Ka*
u64 y64[CRYPTO_KEYBYTES / 8];
from_bit_interleaving(&tmp0, x0);
y64[0] = U64BIG(tmp0);
from_bit_interleaving(&tmp0, x1);
y64[1] = U64BIG(tmp0);
u32 ka_star32[CRYPTO_KEYBYTES / 4];
isap_rk(k, ISAP_IV_KA, (u8 *)y64, (u8 *)ka_star32, CRYPTO_KEYBYTES);
// Finalize - Squeeze T
x0.o = ka_star32[0];
x0.e = ka_star32[1];
x1.o = ka_star32[2];
x1.e = ka_star32[3];
P_sH;
from_bit_interleaving(&tmp0, x0);
*(u64 *)(tag + 0) = U64BIG(tmp0);
from_bit_interleaving(&tmp0, x1);
*(u64 *)(tag + 8) = U64BIG(tmp0);
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const u8 *k,
const u8 *npub,
const u8 *m, u64 mlen,
u8 *c)
{
// Derive Ke
u8 ke[ISAP_STATE_SZ - CRYPTO_NPUBBYTES];
isap_rk(k, ISAP_IV_KE, npub, ke, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
// State and temporary variables
u32_2 x0, x1, x2, x3, x4;
u64 tmp0;
// Init State
x0.o = *(u32 *)(ke + 0);
x0.e = *(u32 *)(ke + 4);
x1.o = *(u32 *)(ke + 8);
x1.e = *(u32 *)(ke + 12);
x2.o = *(u32 *)(ke + 16);
x2.e = *(u32 *)(ke + 20);
to_bit_interleaving(&x3, U64BIG(*(u64 *)npub));
to_bit_interleaving(&x4, U64BIG(*(u64 *)(npub + 8)));
// Squeeze full lanes
while (mlen >= 8)
{
P_sE_UROL;
from_bit_interleaving(&tmp0, x0);
*(u64 *)c = *(u64 *)m ^ U64BIG(tmp0);
mlen -= 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Squeeze partial lane
if (mlen > 0)
{
P_sE;
from_bit_interleaving(&tmp0, x0);
tmp0 = U64BIG(tmp0);
u8 *tmp0_bytes = (u8 *)&tmp0;
for (u8 i = 0; i < mlen; i++)
{
*c = *m ^ tmp0_bytes[i];
m += 1;
c += 1;
}
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long long adlen,
const unsigned char *c, const unsigned long long clen,
unsigned char *tag);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
unsigned char *out,
const unsigned long outlen);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long long mlen,
unsigned char *c);
#endif
### Building and Executing Code:
See `README.md` in the top folder.
### Implementation By:
* [Robert Primas](https://rprimas.github.io)
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include <stdio.h>
#include <string.h>
#include "api.h"
#include "isap.h"
typedef unsigned char u8;
typedef unsigned long long u64;
typedef unsigned long u32;
typedef long long i64;
const u8 ISAP_IV1[] = {0x01,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const u8 ISAP_IV2[] = {0x02,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const u8 ISAP_IV3[] = {0x03,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define ROTR(x,n) (((x)>>(n))|((x)<<(64-(n))))
#define EXT_BYTE(x,n) ((u8)((u64)(x)>>(8*(7-(n)))))
#define INS_BYTE(x,n) ((u64)(x)<<(8*(7-(n))))
#define U64BIG(x) \
((ROTR(x, 8) & (0xFF000000FF000000ULL)) | \
(ROTR(x,24) & (0x00FF000000FF0000ULL)) | \
(ROTR(x,40) & (0x0000FF000000FF00ULL)) | \
(ROTR(x,56) & (0x000000FF000000FFULL)))
#define ROUND(C) ({\
x2 ^= C;\
x0 ^= x4;\
x4 ^= x3;\
x2 ^= x1;\
t0 = x0;\
t4 = x4;\
t3 = x3;\
t1 = x1;\
t2 = x2;\
x0 = t0 ^ ((~t1) & t2);\
x2 = t2 ^ ((~t3) & t4);\
x4 = t4 ^ ((~t0) & t1);\
x1 = t1 ^ ((~t2) & t3);\
x3 = t3 ^ ((~t4) & t0);\
x1 ^= x0;\
t1 = x1;\
x1 = ROTR(x1, R[1][0]);\
x3 ^= x2;\
t2 = x2;\
x2 = ROTR(x2, R[2][0]);\
t4 = x4;\
t2 ^= x2;\
x2 = ROTR(x2, R[2][1] - R[2][0]);\
t3 = x3;\
t1 ^= x1;\
x3 = ROTR(x3, R[3][0]);\
x0 ^= x4;\
x4 = ROTR(x4, R[4][0]);\
t3 ^= x3;\
x2 ^= t2;\
x1 = ROTR(x1, R[1][1] - R[1][0]);\
t0 = x0;\
x2 = ~x2;\
x3 = ROTR(x3, R[3][1] - R[3][0]);\
t4 ^= x4;\
x4 = ROTR(x4, R[4][1] - R[4][0]);\
x3 ^= t3;\
x1 ^= t1;\
x0 = ROTR(x0, R[0][0]);\
x4 ^= t4;\
t0 ^= x0;\
x0 = ROTR(x0, R[0][1] - R[0][0]);\
x0 ^= t0;\
})
#define P12 ({\
ROUND(0xf0);\
ROUND(0xe1);\
ROUND(0xd2);\
ROUND(0xc3);\
ROUND(0xb4);\
ROUND(0xa5);\
ROUND(0x96);\
ROUND(0x87);\
ROUND(0x78);\
ROUND(0x69);\
ROUND(0x5a);\
ROUND(0x4b);\
})
#define P6 ({\
ROUND(0x96);\
ROUND(0x87);\
ROUND(0x78);\
ROUND(0x69);\
ROUND(0x5a);\
ROUND(0x4b);\
})
#define P1 ({\
ROUND(0x4b);\
})
static const int R[5][2] = {
{19, 28}, {39, 61}, {1, 6}, {10, 17}, {7, 41}
};
#define ABSORB_LANES(src, len) ({ \
u32 rem_bytes = len; \
u64 *src64 = (u64 *)src; \
u32 idx64 = 0; \
while(1){ \
if(rem_bytes>ISAP_rH_SZ){ \
x0 ^= U64BIG(src64[idx64]); \
idx64++; \
P12; \
rem_bytes -= ISAP_rH_SZ; \
} else if(rem_bytes==ISAP_rH_SZ){ \
x0 ^= U64BIG(src64[idx64]); \
P12; \
x0 ^= 0x8000000000000000ULL; \
P12; \
break; \
} else { \
u64 lane64; \
u8 *lane8 = (u8 *)&lane64; \
u32 idx8 = idx64*8; \
for (u32 i = 0; i < 8; i++) { \
if(i<(rem_bytes)){ \
lane8[i] = src[idx8]; \
idx8++; \
} else if(i==rem_bytes){ \
lane8[i] = 0x80; \
} else { \
lane8[i] = 0x00; \
} \
} \
x0 ^= U64BIG(lane64); \
P12; \
break; \
} \
} \
})
/******************************************************************************/
/* IsapRk */
/******************************************************************************/
void isap_rk(
const u8 *k,
const u8 *iv,
const u8 *y,
const u64 ylen,
u8 *out,
const u64 outlen
){
const u64 *k64 = (u64 *)k;
const u64 *iv64 = (u64 *)iv;
u64 *out64 = (u64 *)out;
u64 x0, x1, x2, x3, x4;
u64 t0, t1, t2, t3, t4;
// Init state
t0 = t1 = t2 = t3 = t4 = 0;
x0 = U64BIG(k64[0]);
x1 = U64BIG(k64[1]);
x2 = U64BIG(iv64[0]);
x3 = x4 = 0;
P12;
// Absorb Y
for (size_t i = 0; i < ylen*8-1; i++){
size_t cur_byte_pos = i/8;
size_t cur_bit_pos = 7-(i%8);
u8 cur_bit = ((y[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7;
x0 ^= ((u64)cur_bit) << 56;
P1;
}
u8 cur_bit = ((y[ylen-1]) & 0x01) << 7;
x0 ^= ((u64)cur_bit) << 56;
P12;
// Extract K*
out64[0] = U64BIG(x0);
out64[1] = U64BIG(x1);
if(outlen == 24){
out64[2] = U64BIG(x2);
}
}
/******************************************************************************/
/* IsapMac */
/******************************************************************************/
void isap_mac(
const u8 *k,
const u8 *npub,
const u8 *ad, const u64 adlen,
const u8 *c, const u64 clen,
u8 *tag
){
u8 state[ISAP_STATE_SZ];
const u64 *npub64 = (u64 *)npub;
u64 *state64 = (u64 *)state;
u64 x0, x1, x2, x3, x4;
u64 t0, t1, t2, t3, t4;
t0 = t1 = t2 = t3 = t4 = 0;
// Init state
x0 = U64BIG(npub64[0]);
x1 = U64BIG(npub64[1]);
x2 = U64BIG(((u64 *)ISAP_IV1)[0]);
x3 = x4 = 0;
P12;
// Absorb AD
ABSORB_LANES(ad,adlen);
// Domain seperation
x4 ^= 0x0000000000000001ULL;
// Absorb C
ABSORB_LANES(c,clen);
// Derive K*
state64[0] = U64BIG(x0);
state64[1] = U64BIG(x1);
state64[2] = U64BIG(x2);
state64[3] = U64BIG(x3);
state64[4] = U64BIG(x4);
isap_rk(k,ISAP_IV2,(u8 *)state64,CRYPTO_KEYBYTES,(u8 *)state64,CRYPTO_KEYBYTES);
x0 = U64BIG(state64[0]);
x1 = U64BIG(state64[1]);
x2 = U64BIG(state64[2]);
x3 = U64BIG(state64[3]);
x4 = U64BIG(state64[4]);
// Squeeze tag
P12;
unsigned long long *tag64 = (u64 *)tag;
tag64[0] = U64BIG(x0);
tag64[1] = U64BIG(x1);
}
/******************************************************************************/
/* IsapEnc */
/******************************************************************************/
void isap_enc(
const u8 *k,
const u8 *npub,
const u8 *m,
const u64 mlen,
u8 *c
){
u8 state[ISAP_STATE_SZ];
// Init state
u64 *state64 = (u64 *)state;
u64 *npub64 = (u64 *)npub;
isap_rk(k,ISAP_IV3,npub,CRYPTO_NPUBBYTES,state,ISAP_STATE_SZ-CRYPTO_NPUBBYTES);
u64 x0, x1, x2, x3, x4;
u64 t0, t1, t2, t3, t4;
t0 = t1 = t2 = t3 = t4 = 0;
x0 = U64BIG(state64[0]);
x1 = U64BIG(state64[1]);
x2 = U64BIG(state64[2]);
x3 = U64BIG(npub64[0]);
x4 = U64BIG(npub64[1]);
P6;
// Squeeze key stream
u64 rem_bytes = mlen;
u64 *m64 = (u64 *)m;
u64 *c64 = (u64 *)c;
u32 idx64 = 0;
while(1){
if(rem_bytes>ISAP_rH_SZ){
// Squeeze full lane
c64[idx64] = U64BIG(x0) ^ m64[idx64];
idx64++;
P6;
rem_bytes -= ISAP_rH_SZ;
} else if(rem_bytes==ISAP_rH_SZ){
// Squeeze full lane and stop
c64[idx64] = U64BIG(x0) ^ m64[idx64];
break;
} else {
// Squeeze partial lane and stop
u64 lane64 = U64BIG(x0);
u8 *lane8 = (u8 *)&lane64;
u32 idx8 = idx64*8;
for (u32 i = 0; i < rem_bytes; i++) {
c[idx8] = lane8[i] ^ m[idx8];
idx8++;
}
break;
}
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 1
#define ISAP_sE 6
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH+7)/8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long long adlen,
const unsigned char *c, const unsigned long long clen,
unsigned char *tag
);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long long inlen,
unsigned char *out,
const unsigned long long outlen
);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long long mlen,
unsigned char *c
);
#endif
This source diff could not be displayed because it is too large. You can view the blob instead.
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#ifndef ASCONP_H_
#define ASCONP_H_
#include <inttypes.h>
typedef unsigned char u8;
typedef uint32_t u32;
typedef unsigned long long u64;
typedef struct
{
u32 e;
u32 o;
} u32_2;
// Round constants, bit-interleaved
u32 rc_o[12] = {0xc, 0xc, 0x9, 0x9, 0xc, 0xc, 0x9, 0x9, 0x6, 0x6, 0x3, 0x3};
u32 rc_e[12] = {0xc, 0x9, 0xc, 0x9, 0x6, 0x3, 0x6, 0x3, 0xc, 0x9, 0xc, 0x9};
/* ---------------------------------------------------------------- */
u64 U64BIG(u64 x)
{
return ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) |
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) |
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) |
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56));
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void to_bit_interleaving(u32_2 *out, u64 in)
{
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
(*out).e = (lo & 0x0000FFFF) | (hi << 16);
(*out).o = (lo >> 16) | (hi & 0xFFFF0000);
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void from_bit_interleaving(u64 *out, u32_2 in)
{
u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16);
u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000);
u32 r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
*out = (u64)hi << 32 | lo;
}
/* ---------------------------------------------------------------- */
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
/* ---------------------------------------------------------------- */
void static inline PX(u32 rounds, u32_2* x0, u32_2* x1, u32_2* x2, u32_2* x3, u32_2* x4) {
u32_2 t0, t1, t2, t3, t4;
for (u32 r = 12 - rounds; r < 12; r++){
/* rcon */
(*x2).e ^= rc_e[r];
(*x2).o ^= rc_o[r];
/* non-linear layer */
(*x0).e ^= (*x4).e;
(*x0).o ^= (*x4).o;
(*x4).e ^= (*x3).e;
(*x4).o ^= (*x3).o;
(*x2).e ^= (*x1).e;
(*x2).o ^= (*x1).o;
(t0).e = (*x0).e;
(t0).o = (*x0).o;
(t4).e = (*x4).e;
(t4).o = (*x4).o;
(t3).e = (*x3).e;
(t3).o = (*x3).o;
(t1).e = (*x1).e;
(t1).o = (*x1).o;
(t2).e = (*x2).e;
(t2).o = (*x2).o;
(*x0).e = t0.e ^ (~t1.e & t2.e);
(*x0).o = t0.o ^ (~t1.o & t2.o);
(*x2).e = t2.e ^ (~t3.e & t4.e);
(*x2).o = t2.o ^ (~t3.o & t4.o);
(*x4).e = t4.e ^ (~t0.e & t1.e);
(*x4).o = t4.o ^ (~t0.o & t1.o);
(*x1).e = t1.e ^ (~t2.e & t3.e);
(*x1).o = t1.o ^ (~t2.o & t3.o);
(*x3).e = t3.e ^ (~t4.e & t0.e);
(*x3).o = t3.o ^ (~t4.o & t0.o);
(*x1).e ^= (*x0).e;
(*x1).o ^= (*x0).o;
(*x3).e ^= (*x2).e;
(*x3).o ^= (*x2).o;
(*x0).e ^= (*x4).e;
(*x0).o ^= (*x4).o;
/* linear layer */
t0.e = (*x0).e ^ ROTR32((*x0).o, 4);
t0.o = (*x0).o ^ ROTR32((*x0).e, 5);
t1.e = (*x1).e ^ ROTR32((*x1).e, 11);
t1.o = (*x1).o ^ ROTR32((*x1).o, 11);
t2.e = (*x2).e ^ ROTR32((*x2).o, 2);
t2.o = (*x2).o ^ ROTR32((*x2).e, 3);
t3.e = (*x3).e ^ ROTR32((*x3).o, 3);
t3.o = (*x3).o ^ ROTR32((*x3).e, 4);
t4.e = (*x4).e ^ ROTR32((*x4).e, 17);
t4.o = (*x4).o ^ ROTR32((*x4).o, 17);
(*x0).e ^= ROTR32(t0.o, 9);
(*x0).o ^= ROTR32(t0.e, 10);
(*x1).e ^= ROTR32(t1.o, 19);
(*x1).o ^= ROTR32(t1.e, 20);
(*x2).e ^= t2.o;
(*x2).o ^= ROTR32(t2.e, 1);
(*x3).e ^= ROTR32(t3.e, 5);
(*x3).o ^= ROTR32(t3.o, 5);
(*x4).e ^= ROTR32(t4.o, 3);
(*x4).o ^= ROTR32(t4.e, 4);
(*x2).e = ~(*x2).e;
(*x2).o = ~(*x2).o;
}
}
#endif // ASCONP_H_
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include "api.h"
#include "isap.h"
#include "asconp.h"
const u8 ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
#define P_sB PX(12,&x0,&x1,&x2,&x3,&x4)
#define P_sE PX(12,&x0,&x1,&x2,&x3,&x4)
#define P_sH PX(12,&x0,&x1,&x2,&x3,&x4)
#define P_sK PX(12,&x0,&x1,&x2,&x3,&x4)
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const u8 *k,
const u8 *iv,
const u8 *y,
u8 *out,
const u8 outlen)
{
// State variables
u32_2 x0, x1, x2, x3, x4;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)(k + 0)));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(k + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(iv + 0)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sK;
// Absorb Y, bit by bit
for (u8 i = 0; i < 127; i++) {
u8 cur_byte_pos = i / 8;
u8 cur_bit_pos = 7 - (i % 8);
u32 cur_bit = ((y[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7;
x0.o ^= ((u32)cur_bit) << 24;
P_sB;
}
u8 cur_bit = ((y[15]) & 0x01) << 7;
x0.o ^= ((u32)cur_bit) << (24);
// Squeeze - Derive K*
P_sK;
*(u32 *)(out + 0) = x0.o;
*(u32 *)(out + 4) = x0.e;
*(u32 *)(out + 8) = x1.o;
*(u32 *)(out + 12) = x1.e;
if (outlen > 16) {
*(u32 *)(out + 16) = x2.o;
*(u32 *)(out + 20) = x2.e;
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const u8 *k,
const u8 *npub,
const u8 *ad, u64 adlen,
const u8 *c, u64 clen,
u8 *tag)
{
// State and temporary variables
u32_2 x0, x1, x2, x3, x4;
u32_2 t0;
u64 tmp0;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)npub + 0));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(npub + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(ISAP_IV_A)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sH;
// Absorb full lanes of AD
while (adlen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)ad));
x0.e ^= t0.e;
x0.o ^= t0.o;
adlen -= ISAP_rH / 8;
ad += ISAP_rH / 8;
P_sH;
}
// Absorb partial lane of AD and add padding
if (adlen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < adlen; i++)
{
tmp0_bytes[i] = *ad;
ad += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb AD padding if not already done before
if (adlen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Domain Seperation
x4.e ^= ((u32)0x01);
// Absorb full lanes of C
while (clen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)c));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
clen -= ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Absorb partial lane of C and add padding
if (clen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < clen; i++)
{
tmp0_bytes[i] = *c;
c += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb C padding if not already done before
if (clen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Finalize - Derive Ka*
u64 y64[CRYPTO_KEYBYTES / 8];
from_bit_interleaving(&tmp0, x0);
y64[0] = U64BIG(tmp0);
from_bit_interleaving(&tmp0, x1);
y64[1] = U64BIG(tmp0);
u32 ka_star32[CRYPTO_KEYBYTES / 4];
isap_rk(k, ISAP_IV_KA, (u8 *)y64, (u8 *)ka_star32, CRYPTO_KEYBYTES);
// Finalize - Squeeze T
x0.o = ka_star32[0];
x0.e = ka_star32[1];
x1.o = ka_star32[2];
x1.e = ka_star32[3];
P_sH;
from_bit_interleaving(&tmp0, x0);
*(u64 *)(tag + 0) = U64BIG(tmp0);
from_bit_interleaving(&tmp0, x1);
*(u64 *)(tag + 8) = U64BIG(tmp0);
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const u8 *k,
const u8 *npub,
const u8 *m, u64 mlen,
u8 *c)
{
// Derive Ke
u8 ke[ISAP_STATE_SZ - CRYPTO_NPUBBYTES];
isap_rk(k, ISAP_IV_KE, npub, ke, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
// State and temporary variables
u32_2 x0, x1, x2, x3, x4;
u64 tmp0;
// Init State
x0.o = *(u32 *)(ke + 0);
x0.e = *(u32 *)(ke + 4);
x1.o = *(u32 *)(ke + 8);
x1.e = *(u32 *)(ke + 12);
x2.o = *(u32 *)(ke + 16);
x2.e = *(u32 *)(ke + 20);
to_bit_interleaving(&x3, U64BIG(*(u64 *)npub));
to_bit_interleaving(&x4, U64BIG(*(u64 *)(npub + 8)));
// Squeeze full lanes
while (mlen >= 8)
{
P_sE;
from_bit_interleaving(&tmp0, x0);
*(u64 *)c = *(u64 *)m ^ U64BIG(tmp0);
mlen -= 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Squeeze partial lane
if (mlen > 0)
{
P_sE;
from_bit_interleaving(&tmp0, x0);
tmp0 = U64BIG(tmp0);
u8 *tmp0_bytes = (u8 *)&tmp0;
for (u8 i = 0; i < mlen; i++)
{
*c = *m ^ tmp0_bytes[i];
m += 1;
c += 1;
}
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 12
#define ISAP_sE 12
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *c, unsigned long long clen,
unsigned char *tag);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
unsigned char *out,
const unsigned char outlen);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, unsigned long long mlen,
unsigned char *c);
#endif
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#ifndef ASCONP_H_
#define ASCONP_H_
typedef unsigned char u8;
typedef unsigned long u32;
typedef unsigned long long u64;
typedef struct
{
u32 e;
u32 o;
} u32_2;
// Round constants, bit-interleaved
u32 rc_o[12] = {0xc, 0xc, 0x9, 0x9, 0xc, 0xc, 0x9, 0x9, 0x6, 0x6, 0x3, 0x3};
u32 rc_e[12] = {0xc, 0x9, 0xc, 0x9, 0x6, 0x3, 0x6, 0x3, 0xc, 0x9, 0xc, 0x9};
/* ---------------------------------------------------------------- */
u64 U64BIG(u64 x)
{
return ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) |
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) |
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) |
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56));
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void to_bit_interleaving(u32_2 *out, u64 in)
{
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
(*out).e = (lo & 0x0000FFFF) | (hi << 16);
(*out).o = (lo >> 16) | (hi & 0xFFFF0000);
}
/* ---------------------------------------------------------------- */
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void from_bit_interleaving(u64 *out, u32_2 in)
{
u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16);
u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000);
u32 r0, r1;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
*out = (u64)hi << 32 | lo;
}
/* ---------------------------------------------------------------- */
#define ROUND() \
do \
{ \
__asm__ __volatile__( \
"eor %[x0_e], %[x0_e], %[x4_e]\n\t" \
"eor %[x0_o], %[x0_o], %[x4_o]\n\t" \
"eor %[x4_e], %[x4_e], %[x3_e]\n\t" \
"eor %[x4_o], %[x4_o], %[x3_o]\n\t" \
"eor %[x2_e], %[x2_e], %[x1_e]\n\t" \
"eor %[x2_o], %[x2_o], %[x1_o]\n\t" \
"bic %[reg0], %[x0_e], %[x4_e]\n\t" \
"bic %[reg1], %[x4_e], %[x3_e]\n\t" \
"bic %[reg2], %[x2_e], %[x1_e]\n\t" \
"bic %[reg3], %[x1_e], %[x0_e]\n\t" \
"eor %[x2_e], %[x2_e], %[reg1]\n\t" \
"eor %[x0_e], %[x0_e], %[reg2]\n\t" \
"eor %[x4_e], %[x4_e], %[reg3]\n\t" \
"bic %[reg3], %[x3_e], %[x2_e]\n\t" \
"eor %[x3_e], %[x3_e], %[reg0]\n\t" \
"bic %[reg2], %[x0_o], %[x4_o]\n\t" \
"bic %[reg0], %[x2_o], %[x1_o]\n\t" \
"bic %[reg1], %[x4_o], %[x3_o]\n\t" \
"eor %[x1_e], %[x1_e], %[reg3]\n\t" \
"eor %[x0_o], %[x0_o], %[reg0]\n\t" \
"eor %[x2_o], %[x2_o], %[reg1]\n\t" \
"bic %[reg3], %[x1_o], %[x0_o]\n\t" \
"bic %[reg0], %[x3_o], %[x2_o]\n\t" \
"eor %[x3_o], %[x3_o], %[reg2]\n\t" \
"eor %[x3_o], %[x3_o], %[x2_o]\n\t" \
"eor %[x4_o], %[x4_o], %[reg3]\n\t" \
"eor %[x1_o], %[x1_o], %[reg0]\n\t" \
"eor %[x3_e], %[x3_e], %[x2_e]\n\t" \
"eor %[x1_e], %[x1_e], %[x0_e]\n\t" \
"eor %[x1_o], %[x1_o], %[x0_o]\n\t" \
"eor %[x0_e], %[x0_e], %[x4_e]\n\t" \
"eor %[x0_o], %[x0_o], %[x4_o]\n\t" \
"mvn %[x2_e], %[x2_e]\n\t" \
"mvn %[x2_o], %[x2_o]\n\t" \
"eor %[reg0], %[x0_e], %[x0_o], ror #4\n\t" \
"eor %[reg1], %[x0_o], %[x0_e], ror #5\n\t" \
"eor %[reg2], %[x1_e], %[x1_e], ror #11\n\t" \
"eor %[reg3], %[x1_o], %[x1_o], ror #11\n\t" \
"eor %[x0_e], %[x0_e], %[reg1], ror #9\n\t" \
"eor %[x0_o], %[x0_o], %[reg0], ror #10\n\t" \
"eor %[x1_e], %[x1_e], %[reg3], ror #19\n\t" \
"eor %[x1_o], %[x1_o], %[reg2], ror #20\n\t" \
"eor %[reg0], %[x2_e], %[x2_o], ror #2\n\t" \
"eor %[reg1], %[x2_o], %[x2_e], ror #3\n\t" \
"eor %[reg2], %[x3_e], %[x3_o], ror #3\n\t" \
"eor %[reg3], %[x3_o], %[x3_e], ror #4\n\t" \
"eor %[x2_e], %[x2_e], %[reg1]\n\t" \
"eor %[x2_o], %[x2_o], %[reg0], ror #1\n\t" \
"eor %[x3_e], %[x3_e], %[reg2], ror #5\n\t" \
"eor %[x3_o], %[x3_o], %[reg3], ror #5\n\t" \
"eor %[reg0], %[x4_e], %[x4_e], ror #17\n\t" \
"eor %[reg1], %[x4_o], %[x4_o], ror #17\n\t" \
"eor %[x4_e], %[x4_e], %[reg1], ror #3\n\t" \
"eor %[x4_o], %[x4_o], %[reg0], ror #4\n\t" \
: [ x0_e ] "+r"(x0.e), [ x1_e ] "+r"(x1.e), \
[ x2_e ] "+r"(x2.e), [ x3_e ] "+r"(x3.e), \
[ x4_e ] "+r"(x4.e), [ x0_o ] "+r"(x0.o), \
[ x1_o ] "+r"(x1.o), [ x2_o ] "+r"(x2.o), \
[ x3_o ] "+r"(x3.o), [ x4_o ] "+r"(x4.o), \
[ reg0 ] "=r"(reg0), [ reg1 ] "=r"(reg1), \
[ reg2 ] "=r"(reg2), [ reg3 ] "=r"(reg3)::); \
} while (0)
/* ---------------------------------------------------------------- */
#define P_LOOP(rounds) \
do \
{ \
u32 reg0, reg1, reg2, reg3; \
for (u32 r = 12 - rounds; r < 12; r++) \
{ \
x2.e ^= rc_e[r]; \
x2.o ^= rc_o[r]; \
ROUND(); \
} \
} while (0)
/* ---------------------------------------------------------------- */
#define P_12() \
do \
{ \
u32 reg0, reg1, reg2, reg3; \
x2.e ^= 0xc; \
x2.o ^= 0xc; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0xc; \
ROUND(); \
x2.e ^= 0xc; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0x6; \
x2.o ^= 0xc; \
ROUND(); \
x2.e ^= 0x3; \
x2.o ^= 0xc; \
ROUND(); \
x2.e ^= 0x6; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0x3; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0xc; \
x2.o ^= 0x6; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0x6; \
ROUND(); \
x2.e ^= 0xc; \
x2.o ^= 0x3; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0x3; \
ROUND(); \
} while (0)
/* ---------------------------------------------------------------- */
#define P_6() \
do \
{ \
u32 reg0, reg1, reg2, reg3; \
x2.e ^= 0x6; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0x3; \
x2.o ^= 0x9; \
ROUND(); \
x2.e ^= 0xc; \
x2.o ^= 0x6; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0x6; \
ROUND(); \
x2.e ^= 0xc; \
x2.o ^= 0x3; \
ROUND(); \
x2.e ^= 0x9; \
x2.o ^= 0x3; \
ROUND(); \
} while (0)
/* ---------------------------------------------------------------- */
#define P_1() \
do \
{ \
u32 reg0, reg1, reg2, reg3; \
x2.e ^= 0x9; \
x2.o ^= 0x3; \
ROUND(); \
} while (0)
#endif // ASCONP_H_
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include "api.h"
#include "isap.h"
#include "asconp.h"
const u8 ISAP_IV_A[] = {0x01, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KA[] = {0x02, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
const u8 ISAP_IV_KE[] = {0x03, ISAP_K, ISAP_rH, ISAP_rB, ISAP_sH, ISAP_sB, ISAP_sE, ISAP_sK};
#define P_sB_UROL P_12()
#define P_sE P_LOOP(12)
#define P_sE_UROL P_12()
#define P_sH P_LOOP(12)
#define P_sH_UROL P_12()
#define P_sK P_LOOP(12)
#define P_sK_UROL P_12()
/******************************************************************************/
/* ISAP_RK */
/******************************************************************************/
void isap_rk(
const u8 *k,
const u8 *iv,
const u8 *y,
u8 *out,
const u32 outlen)
{
// State variables
u32_2 x0, x1, x2, x3, x4;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)(k + 0)));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(k + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(iv + 0)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sK;
// Absorb Y, bit by bit
for (u8 i = 0; i < 127; i++)
{
u8 cur_byte_pos = i / 8;
u8 cur_bit_pos = 7 - (i % 8);
u32 cur_bit = ((y[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7;
x0.o ^= ((u32)cur_bit) << 24;
P_sB_UROL;
}
u8 cur_bit = ((y[15]) & 0x01) << 7;
x0.o ^= ((u32)cur_bit) << (24);
// // Absorb Y, bit by bit
// for (u8 i = 0; i < 16; i++)
// {
// u32 cur_byte = *y;
// x0.o ^= (cur_byte & 0x80) << 24;
// P_sB_UROL;
// x0.o ^= (cur_byte & 0x40) << 25;
// P_sB_UROL;
// x0.o ^= (cur_byte & 0x20) << 26;
// P_sB_UROL;
// x0.o ^= (cur_byte & 0x10) << 27;
// P_sB_UROL;
// x0.o ^= (cur_byte & 0x08) << 28;
// P_sB_UROL;
// x0.o ^= (cur_byte & 0x04) << 29;
// P_sB_UROL;
// x0.o ^= (cur_byte & 0x02) << 30;
// P_sB_UROL;
// x0.o ^= (cur_byte & 0x01) << 31;
// if (i != 15)
// {
// P_sB_UROL;
// y += 1;
// }
// }
// Squeeze - Derive K*
P_sK;
*(u32 *)(out + 0) = x0.o;
*(u32 *)(out + 4) = x0.e;
*(u32 *)(out + 8) = x1.o;
*(u32 *)(out + 12) = x1.e;
if (outlen > 16)
{
*(u32 *)(out + 16) = x2.o;
*(u32 *)(out + 20) = x2.e;
}
}
/******************************************************************************/
/* ISAP_MAC */
/******************************************************************************/
void isap_mac(
const u8 *k,
const u8 *npub,
const u8 *ad, u64 adlen,
const u8 *c, u64 clen,
u8 *tag)
{
// State and temporary variables
u32_2 x0, x1, x2, x3, x4;
u32_2 t0;
u64 tmp0;
// Initialize
to_bit_interleaving(&x0, U64BIG(*(u64 *)npub + 0));
to_bit_interleaving(&x1, U64BIG(*(u64 *)(npub + 8)));
to_bit_interleaving(&x2, U64BIG(*(u64 *)(ISAP_IV_A)));
x3.o = 0;
x3.e = 0;
x4.o = 0;
x4.e = 0;
P_sH;
// Absorb full lanes of AD
while (adlen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)ad));
x0.e ^= t0.e;
x0.o ^= t0.o;
adlen -= ISAP_rH / 8;
ad += ISAP_rH / 8;
P_sH_UROL;
}
// Absorb partial lane of AD and add padding
if (adlen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < adlen; i++)
{
tmp0_bytes[i] = *ad;
ad += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb AD padding if not already done before
if (adlen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Domain Seperation
x4.e ^= ((u32)0x01);
// Absorb full lanes of C
while (clen >= 8)
{
to_bit_interleaving(&t0, U64BIG(*(u64 *)c));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH_UROL;
clen -= ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Absorb partial lane of C and add padding
if (clen > 0)
{
tmp0 = 0;
u8 *tmp0_bytes = (u8 *)&tmp0;
u8 i;
for (i = 0; i < clen; i++)
{
tmp0_bytes[i] = *c;
c += 1;
}
tmp0_bytes[i] = 0x80;
to_bit_interleaving(&t0, U64BIG(tmp0));
x0.e ^= t0.e;
x0.o ^= t0.o;
P_sH;
}
// Absorb C padding if not already done before
if (clen == 0)
{
x0.o ^= 0x80000000;
P_sH;
}
// Finalize - Derive Ka*
u64 y64[CRYPTO_KEYBYTES / 8];
from_bit_interleaving(&tmp0, x0);
y64[0] = U64BIG(tmp0);
from_bit_interleaving(&tmp0, x1);
y64[1] = U64BIG(tmp0);
u32 ka_star32[CRYPTO_KEYBYTES / 4];
isap_rk(k, ISAP_IV_KA, (u8 *)y64, (u8 *)ka_star32, CRYPTO_KEYBYTES);
// Finalize - Squeeze T
x0.o = ka_star32[0];
x0.e = ka_star32[1];
x1.o = ka_star32[2];
x1.e = ka_star32[3];
P_sH;
from_bit_interleaving(&tmp0, x0);
*(u64 *)(tag + 0) = U64BIG(tmp0);
from_bit_interleaving(&tmp0, x1);
*(u64 *)(tag + 8) = U64BIG(tmp0);
}
/******************************************************************************/
/* ISAP_ENC */
/******************************************************************************/
void isap_enc(
const u8 *k,
const u8 *npub,
const u8 *m, u64 mlen,
u8 *c)
{
// Derive Ke
u8 ke[ISAP_STATE_SZ - CRYPTO_NPUBBYTES];
isap_rk(k, ISAP_IV_KE, npub, ke, ISAP_STATE_SZ - CRYPTO_NPUBBYTES);
// State and temporary variables
u32_2 x0, x1, x2, x3, x4;
u64 tmp0;
// Init State
x0.o = *(u32 *)(ke + 0);
x0.e = *(u32 *)(ke + 4);
x1.o = *(u32 *)(ke + 8);
x1.e = *(u32 *)(ke + 12);
x2.o = *(u32 *)(ke + 16);
x2.e = *(u32 *)(ke + 20);
to_bit_interleaving(&x3, U64BIG(*(u64 *)npub));
to_bit_interleaving(&x4, U64BIG(*(u64 *)(npub + 8)));
// Squeeze full lanes
while (mlen >= 8)
{
P_sE_UROL;
from_bit_interleaving(&tmp0, x0);
*(u64 *)c = *(u64 *)m ^ U64BIG(tmp0);
mlen -= 8;
m += ISAP_rH / 8;
c += ISAP_rH / 8;
}
// Squeeze partial lane
if (mlen > 0)
{
P_sE;
from_bit_interleaving(&tmp0, x0);
tmp0 = U64BIG(tmp0);
u8 *tmp0_bytes = (u8 *)&tmp0;
for (u8 i = 0; i < mlen; i++)
{
*c = *m ^ tmp0_bytes[i];
m += 1;
c += 1;
}
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 12
#define ISAP_sE 12
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH + 7) / 8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long long adlen,
const unsigned char *c, const unsigned long long clen,
unsigned char *tag);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
unsigned char *out,
const unsigned long outlen);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long long mlen,
unsigned char *c);
#endif
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include <stdio.h>
#include <string.h>
#include "api.h"
#include "isap.h"
typedef unsigned char u8;
typedef unsigned long long u64;
typedef unsigned long u32;
typedef long long i64;
const u8 ISAP_IV1[] = {0x01,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const u8 ISAP_IV2[] = {0x02,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const u8 ISAP_IV3[] = {0x03,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define ROTR(x,n) (((x)>>(n))|((x)<<(64-(n))))
#define EXT_BYTE(x,n) ((u8)((u64)(x)>>(8*(7-(n)))))
#define INS_BYTE(x,n) ((u64)(x)<<(8*(7-(n))))
#define U64BIG(x) \
((ROTR(x, 8) & (0xFF000000FF000000ULL)) | \
(ROTR(x,24) & (0x00FF000000FF0000ULL)) | \
(ROTR(x,40) & (0x0000FF000000FF00ULL)) | \
(ROTR(x,56) & (0x000000FF000000FFULL)))
#define ROUND(C) ({\
x2 ^= C;\
x0 ^= x4;\
x4 ^= x3;\
x2 ^= x1;\
t0 = x0;\
t4 = x4;\
t3 = x3;\
t1 = x1;\
t2 = x2;\
x0 = t0 ^ ((~t1) & t2);\
x2 = t2 ^ ((~t3) & t4);\
x4 = t4 ^ ((~t0) & t1);\
x1 = t1 ^ ((~t2) & t3);\
x3 = t3 ^ ((~t4) & t0);\
x1 ^= x0;\
t1 = x1;\
x1 = ROTR(x1, R[1][0]);\
x3 ^= x2;\
t2 = x2;\
x2 = ROTR(x2, R[2][0]);\
t4 = x4;\
t2 ^= x2;\
x2 = ROTR(x2, R[2][1] - R[2][0]);\
t3 = x3;\
t1 ^= x1;\
x3 = ROTR(x3, R[3][0]);\
x0 ^= x4;\
x4 = ROTR(x4, R[4][0]);\
t3 ^= x3;\
x2 ^= t2;\
x1 = ROTR(x1, R[1][1] - R[1][0]);\
t0 = x0;\
x2 = ~x2;\
x3 = ROTR(x3, R[3][1] - R[3][0]);\
t4 ^= x4;\
x4 = ROTR(x4, R[4][1] - R[4][0]);\
x3 ^= t3;\
x1 ^= t1;\
x0 = ROTR(x0, R[0][0]);\
x4 ^= t4;\
t0 ^= x0;\
x0 = ROTR(x0, R[0][1] - R[0][0]);\
x0 ^= t0;\
})
#define P12 ({\
ROUND(0xf0);\
ROUND(0xe1);\
ROUND(0xd2);\
ROUND(0xc3);\
ROUND(0xb4);\
ROUND(0xa5);\
ROUND(0x96);\
ROUND(0x87);\
ROUND(0x78);\
ROUND(0x69);\
ROUND(0x5a);\
ROUND(0x4b);\
})
#define P6 ({\
ROUND(0x96);\
ROUND(0x87);\
ROUND(0x78);\
ROUND(0x69);\
ROUND(0x5a);\
ROUND(0x4b);\
})
#define P1 ({\
ROUND(0x4b);\
})
static const int R[5][2] = {
{19, 28}, {39, 61}, {1, 6}, {10, 17}, {7, 41}
};
#define ABSORB_MAC(src, len) ({ \
u32 rem_bytes = len; \
u64 *src64 = (u64 *)src; \
u32 idx64 = 0; \
while(1){ \
if(rem_bytes>ISAP_rH_SZ){ \
x0 ^= U64BIG(src64[idx64]); \
idx64++; \
P12; \
rem_bytes -= ISAP_rH_SZ; \
} else if(rem_bytes==ISAP_rH_SZ){ \
x0 ^= U64BIG(src64[idx64]); \
P12; \
x0 ^= 0x8000000000000000ULL; \
P12; \
break; \
} else { \
u64 lane64; \
u8 *lane8 = (u8 *)&lane64; \
u32 idx8 = idx64*8; \
for (u32 i = 0; i < 8; i++) { \
if(i<(rem_bytes)){ \
lane8[i] = src[idx8]; \
idx8++; \
} else if(i==rem_bytes){ \
lane8[i] = 0x80; \
} else { \
lane8[i] = 0x00; \
} \
} \
x0 ^= U64BIG(lane64); \
P12; \
break; \
} \
} \
})
/******************************************************************************/
/* IsapRk */
/******************************************************************************/
void isap_rk(
const u8 *k,
const u8 *iv,
const u8 *y,
const u64 ylen,
u8 *out,
const u64 outlen
){
const u64 *k64 = (u64 *)k;
const u64 *iv64 = (u64 *)iv;
u64 *out64 = (u64 *)out;
u64 x0, x1, x2, x3, x4;
u64 t0, t1, t2, t3, t4;
// Init state
t0 = t1 = t2 = t3 = t4 = 0;
x0 = U64BIG(k64[0]);
x1 = U64BIG(k64[1]);
x2 = U64BIG(iv64[0]);
x3 = x4 = 0;
P12;
// Absorb Y
for (size_t i = 0; i < ylen*8-1; i++){
size_t cur_byte_pos = i/8;
size_t cur_bit_pos = 7-(i%8);
u8 cur_bit = ((y[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7;
x0 ^= ((u64)cur_bit) << 56;
P12;
}
u8 cur_bit = ((y[ylen-1]) & 0x01) << 7;
x0 ^= ((u64)cur_bit) << 56;
P12;
// Extract K*
out64[0] = U64BIG(x0);
out64[1] = U64BIG(x1);
if(outlen == 24){
out64[2] = U64BIG(x2);
}
}
/******************************************************************************/
/* IsapMac */
/******************************************************************************/
void isap_mac(
const u8 *k,
const u8 *npub,
const u8 *ad, const u64 adlen,
const u8 *c, const u64 clen,
u8 *tag
){
u8 state[ISAP_STATE_SZ];
const u64 *npub64 = (u64 *)npub;
u64 *state64 = (u64 *)state;
u64 x0, x1, x2, x3, x4;
u64 t0, t1, t2, t3, t4;
t0 = t1 = t2 = t3 = t4 = 0;
// Init state
x0 = U64BIG(npub64[0]);
x1 = U64BIG(npub64[1]);
x2 = U64BIG(((u64 *)ISAP_IV1)[0]);
x3 = x4 = 0;
P12;
// Absorb AD
ABSORB_MAC(ad,adlen);
// Domain seperation
x4 ^= 0x0000000000000001ULL;
// Absorb C
ABSORB_MAC(c,clen);
// Derive K*
state64[0] = U64BIG(x0);
state64[1] = U64BIG(x1);
state64[2] = U64BIG(x2);
state64[3] = U64BIG(x3);
state64[4] = U64BIG(x4);
isap_rk(k,ISAP_IV2,(u8 *)state64,CRYPTO_KEYBYTES,(u8 *)state64,CRYPTO_KEYBYTES);
x0 = U64BIG(state64[0]);
x1 = U64BIG(state64[1]);
x2 = U64BIG(state64[2]);
x3 = U64BIG(state64[3]);
x4 = U64BIG(state64[4]);
// Squeeze tag
P12;
unsigned long long *tag64 = (u64 *)tag;
tag64[0] = U64BIG(x0);
tag64[1] = U64BIG(x1);
}
/******************************************************************************/
/* IsapEnc */
/******************************************************************************/
void isap_enc(
const u8 *k,
const u8 *npub,
const u8 *m,
const u64 mlen,
u8 *c
){
u8 state[ISAP_STATE_SZ];
// Init state
u64 *state64 = (u64 *)state;
u64 *npub64 = (u64 *)npub;
isap_rk(k,ISAP_IV3,npub,CRYPTO_NPUBBYTES,state,ISAP_STATE_SZ-CRYPTO_NPUBBYTES);
u64 x0, x1, x2, x3, x4;
u64 t0, t1, t2, t3, t4;
t0 = t1 = t2 = t3 = t4 = 0;
x0 = U64BIG(state64[0]);
x1 = U64BIG(state64[1]);
x2 = U64BIG(state64[2]);
x3 = U64BIG(npub64[0]);
x4 = U64BIG(npub64[1]);
P12;
// Squeeze key stream
u64 rem_bytes = mlen;
u64 *m64 = (u64 *)m;
u64 *c64 = (u64 *)c;
u32 idx64 = 0;
while(1){
if(rem_bytes>ISAP_rH_SZ){
// Squeeze full lane
c64[idx64] = U64BIG(x0) ^ m64[idx64];
idx64++;
P12;
rem_bytes -= ISAP_rH_SZ;
} else if(rem_bytes==ISAP_rH_SZ){
// Squeeze full lane and stop
c64[idx64] = U64BIG(x0) ^ m64[idx64];
break;
} else {
// Squeeze partial lane and stop
u64 lane64 = U64BIG(x0);
u8 *lane8 = (u8 *)&lane64;
u32 idx8 = idx64*8;
for (u32 i = 0; i < rem_bytes; i++) {
c[idx8] = lane8[i] ^ m[idx8];
idx8++;
}
break;
}
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bytes
#define ISAP_rH 64
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 12
#define ISAP_sB 12
#define ISAP_sE 12
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 40
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH+7)/8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long long adlen,
const unsigned char *c, const unsigned long long clen,
unsigned char *tag
);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long long inlen,
unsigned char *out,
const unsigned long long outlen
);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long long mlen,
unsigned char *c
);
#endif
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
---
Please refer to SnP-documentation.h for more details.
*/
#ifndef _KeccakP_400_SnP_h_
#define _KeccakP_400_SnP_h_
#define KeccakP400_implementation "32-bit optimized ARM assembler implementation"
#define KeccakP400_stateSizeInBytes 50
#define KeccakP400_stateAlignment 4
/* void KeccakP400_StaticInitialize( void ); */
#define KeccakP400_StaticInitialize()
void KeccakP400_Initialize(void *state);
/* void KeccakP400_AddByte(void *state, unsigned char data, unsigned int offset); */
#define KeccakP400_AddByte(argS, argData, argOffset) ((unsigned char*)argS)[argOffset] ^= (argData)
void KeccakP400_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_OverwriteWithZeroes(void *state, unsigned int byteCount);
void KeccakP400_Permute_Nrounds(void *state, unsigned int nrounds);
void KeccakP400_Permute_20rounds(void *state);
void KeccakP400_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
#endif
@
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@
@ For more information, feedback or questions, please refer to our website:
@ https://keccak.team/
@
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ http://creativecommons.org/publicdomain/zero/1.0/
@
@ ---
@
@ This file implements Keccak-p[400] in a SnP-compatible way.
@ Please refer to SnP-documentation.h for more details.
@
@ This implementation comes with KeccakP-400-SnP.h in the same folder.
@ Please refer to LowLevel.build for the exact list of other files it must be combined with.
@
@ WARNING: This implementation assumes a little endian CPU with@ ARMv6M architecture (e.g., Cortex-M0).
.thumb
.syntax unified
.text
.equ _ba , 0*2
.equ _be , 1*2
.equ _bi , 2*2
.equ _bo , 3*2
.equ _bu , 4*2
.equ _ga , 5*2
.equ _ge , 6*2
.equ _gi , 7*2
.equ _go , 8*2
.equ _gu , 9*2
.equ _ka , 10*2
.equ _ke , 11*2
.equ _ki , 12*2
.equ _ko , 13*2
.equ _ku , 14*2
.equ _ma , 15*2
.equ _me , 16*2
.equ _mi , 17*2
.equ _mo , 18*2
.equ _mu , 19*2
.equ _sa , 20*2
.equ _se , 21*2
.equ _si , 22*2
.equ _so , 23*2
.equ _su , 24*2
.macro xor5 result,ptr,b,g,k,m,s
ldrh \result, [\ptr, #\b]
ldrh r7, [\ptr, #\g]
eors \result, \result, r7
ldrh r7, [\ptr, #\k]
eors \result, \result, r7
ldrh r7, [\ptr, #\m]
eors \result, \result, r7
ldrh r7, [\ptr, #\s]
eors \result, \result, r7
.endm
.macro xorrol b, yy, rr
mov r7, \yy
eors \b, \b, r7
.if \rr != 0
lsls r7, \b, #\rr
lsrs \b, \b, #16-\rr
orrs \b, \b, r7
uxth \b, \b
.endif
.endm
.macro rolxor d, a, b, rot
sxth r7, \b
rors r7, r7, \rot
eors r7, r7, \a
uxth r7, r7
mov \d, r7
.endm
.macro xandnot resptr, resofs, aa, bb, cc, temp
mov \temp, \cc
bics \temp, \temp, \bb
eors \temp, \temp, \aa
strh \temp, [\resptr, #\resofs]
.endm
.macro xandnotRC resptr, resofs, aa, bb, cc, rco
bics \cc, \cc, \bb
eors \cc, \cc, \aa
mov r7, r8
ldrh \bb, [r7, #\rco]
eors \cc, \cc, \bb
strh \cc, [\resptr, #\resofs]
.endm
.macro KeccakRound sOut, sIn, rco
@prepTheta
push { \sOut }
movs \sOut, #31
xor5 r1, \sIn, _ba, _ga, _ka, _ma, _sa
xor5 r2, \sIn, _be, _ge, _ke, _me, _se
xor5 r3, \sIn, _bi, _gi, _ki, _mi, _si
xor5 r4, \sIn, _bo, _go, _ko, _mo, _so
xor5 r5, \sIn, _bu, _gu, _ku, _mu, _su
rolxor r9, r5, r2, \sOut
rolxor r10, r1, r3, \sOut
rolxor r11, r2, r4, \sOut
rolxor r12, r3, r5, \sOut
rolxor lr, r4, r1, \sOut
pop { \sOut }
@thetaRhoPiChiIota
ldrh r1, [\sIn, #_bo]
ldrh r2, [\sIn, #_gu]
ldrh r3, [\sIn, #_ka]
ldrh r4, [\sIn, #_me]
ldrh r5, [\sIn, #_si]
xorrol r1, r12, 12
xorrol r2, lr, 4
xorrol r3, r9, 3
xorrol r4, r10, 13
xorrol r5, r11, 13
xandnot \sOut, _ga, r1, r2, r3, r7
xandnot \sOut, _ge, r2, r3, r4, r7
xandnot \sOut, _gi, r3, r4, r5, r7
xandnot \sOut, _go, r4, r5, r1, r7
xandnot \sOut, _gu, r5, r1, r2, r7
ldrh r1, [\sIn, #_be]
ldrh r2, [\sIn, #_gi]
ldrh r3, [\sIn, #_ko]
ldrh r4, [\sIn, #_mu]
ldrh r5, [\sIn, #_sa]
xorrol r1, r10, 1
xorrol r2, r11, 6
xorrol r3, r12, 9
xorrol r4, lr, 8
xorrol r5, r9, 2
xandnot \sOut, _ka, r1, r2, r3, r7
xandnot \sOut, _ke, r2, r3, r4, r7
xandnot \sOut, _ki, r3, r4, r5, r7
xandnot \sOut, _ko, r4, r5, r1, r7
xandnot \sOut, _ku, r5, r1, r2, r7
ldrh r1, [\sIn, #_bu]
ldrh r2, [\sIn, #_ga]
ldrh r3, [\sIn, #_ke]
ldrh r4, [\sIn, #_mi]
ldrh r5, [\sIn, #_so]
xorrol r1, lr, 11
xorrol r2, r9, 4
xorrol r3, r10, 10
xorrol r4, r11, 15
xorrol r5, r12, 8
xandnot \sOut, _ma, r1, r2, r3, r7
xandnot \sOut, _me, r2, r3, r4, r7
xandnot \sOut, _mi, r3, r4, r5, r7
xandnot \sOut, _mo, r4, r5, r1, r7
xandnot \sOut, _mu, r5, r1, r2, r7
ldrh r1, [\sIn, #_bi]
ldrh r2, [\sIn, #_go]
ldrh r3, [\sIn, #_ku]
ldrh r4, [\sIn, #_ma]
ldrh r5, [\sIn, #_se]
xorrol r1, r11, 14
xorrol r2, r12, 7
xorrol r3, lr, 7
xorrol r4, r9, 9
xorrol r5, r10, 2
xandnot \sOut, _sa, r1, r2, r3, r7
xandnot \sOut, _se, r2, r3, r4, r7
xandnot \sOut, _si, r3, r4, r5, r7
xandnot \sOut, _so, r4, r5, r1, r7
xandnot \sOut, _su, r5, r1, r2, r7
ldrh r1, [\sIn, #_ba]
ldrh r2, [\sIn, #_ge]
ldrh r3, [\sIn, #_ki]
ldrh r4, [\sIn, #_mo]
ldrh r5, [\sIn, #_su]
xorrol r1, r9, 0
xorrol r2, r10, 12
xorrol r3, r11, 11
xorrol r4, r12, 5
xorrol r5, lr, 14
xandnot \sOut, _be, r2, r3, r4, r7
xandnot \sOut, _bi, r3, r4, r5, r7
xandnot \sOut, _bo, r4, r5, r1, r7
xandnot \sOut, _bu, r5, r1, r2, r7
xandnotRC \sOut, _ba, r1, r2, r3, \rco
.endm
@----------------------------------------------------------------------------
@
@ void KeccakP400_StaticInitialize( void )
@
.align 4
.global KeccakP400_StaticInitialize
.type KeccakP400_StaticInitialize, %function;
KeccakP400_StaticInitialize:
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_Initialize(void *state)
@
.align 4
.global KeccakP400_Initialize
.type KeccakP400_Initialize, %function;
KeccakP400_Initialize:
movs r1, #0
movs r2, #0
movs r3, #0
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
strh r1, [r0]
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_AddByte(void *state, unsigned char byte, unsigned int offset)
@
.align 4
.global KeccakP400_AddByte
.type KeccakP400_AddByte, %function;
KeccakP400_AddByte:
ldrb r3, [r0, r2]
eors r3, r3, r1
strb r3, [r0, r2]
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 4
.global KeccakP400_AddBytes
.type KeccakP400_AddBytes, %function;
KeccakP400_AddBytes:
subs r3, r3, #1
bcc KeccakP400_AddBytes_Exit
adds r0, r0, r2
push {r4,lr}
KeccakP400_AddBytes_Loop:
ldrb r2, [r1, r3]
ldrb r4, [r0, r3]
eors r2, r2, r4
strb r2, [r0, r3]
subs r3, r3, #1
bcs KeccakP400_AddBytes_Loop
pop {r4,pc}
KeccakP400_AddBytes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 4
.global KeccakP400_OverwriteBytes
.type KeccakP400_OverwriteBytes, %function;
KeccakP400_OverwriteBytes:
subs r3, r3, #1
bcc KeccakP400_OverwriteBytes_Exit
adds r0, r0, r2
KeccakP400_OverwriteBytes_Loop:
ldrb r2, [r1, r3]
strb r2, [r0, r3]
subs r3, r3, #1
bcs KeccakP400_OverwriteBytes_Loop
KeccakP400_OverwriteBytes_Exit:
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_OverwriteWithZeroes(void *state, unsigned int byteCount)
@
.align 4
.global KeccakP400_OverwriteWithZeroes
.type KeccakP400_OverwriteWithZeroes, %function;
KeccakP400_OverwriteWithZeroes:
movs r3, #0
cmp r1, #0
beq KeccakP400_OverwriteWithZeroes_Exit
KeccakP400_OverwriteWithZeroes_LoopBytes:
subs r1, r1, #1
strb r3, [r0, r1]
bne KeccakP400_OverwriteWithZeroes_LoopBytes
KeccakP400_OverwriteWithZeroes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 4
.global KeccakP400_ExtractBytes
.type KeccakP400_ExtractBytes, %function;
KeccakP400_ExtractBytes:
subs r3, r3, #1
bcc KeccakP400_ExtractBytes_Exit
adds r0, r0, r2
KeccakP400_ExtractBytes_Loop:
ldrb r2, [r0, r3]
strb r2, [r1, r3]
subs r3, r3, #1
bcs KeccakP400_ExtractBytes_Loop
KeccakP400_ExtractBytes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
@
.align 4
.global KeccakP400_ExtractAndAddBytes
.type KeccakP400_ExtractAndAddBytes, %function;
KeccakP400_ExtractAndAddBytes:
push {r4,r5}
adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length)
ldr r3, [sp, #8] @ get length argument from stack
subs r3, r3, #1 @ .if length != 0
bcc KeccakP400_ExtractAndAddBytes_Exit
KeccakP400_ExtractAndAddBytes_Loop:
ldrb r5, [r0, r3]
ldrb r4, [r1, r3]
eors r5, r5, r4
strb r5, [r2, r3]
subs r3, r3, #1
bcs KeccakP400_ExtractAndAddBytes_Loop
KeccakP400_ExtractAndAddBytes_Exit:
pop {r4,r5}
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_Permute_Nrounds( void *state, unsigned int nr )
@
.align 4
.global KeccakP400_Permute_Nrounds
.type KeccakP400_Permute_Nrounds, %function;
KeccakP400_Permute_Nrounds:
push { r4 - r6, lr }
mov r2, r8
mov r3, r9
mov r4, r10
mov r5, r11
mov r6, r12
push { r2 - r7 }
sub sp, sp, #25*2+6
mov r6, sp
adr r7, KeccakP400_Permute_RoundConstants
subs r7, r7, r1
subs r7, r7, r1
lsls r1, r1, #31
beq KeccakP400_Permute_Nrounds_GoRoundLoop
subs r7, r7, #2 @ odd number of rounds
mov r8, r7
ldm r0!, { r1, r2, r3, r4, r5, r7 } @ copy state to stack
stm r6!, { r1, r2, r3, r4, r5, r7 }
ldm r0!, { r1, r2, r3, r4, r5, r7 }
stm r6!, { r1, r2, r3, r4, r5, r7 }
subs r0, r0, #48
subs r6, r6, #48
ldrh r1, [r0, #_su]
strh r1, [r6, #_su]
b KeccakP400_Permute_RoundOdd
KeccakP400_Permute_Nrounds_GoRoundLoop:
b KeccakP400_Permute_RoundLoop
nop
@----------------------------------------------------------------------------
@
@ void KeccakP400_Permute_20rounds( void *state )
@
.align 4
.global KeccakP400_Permute_20rounds
.type KeccakP400_Permute_20rounds, %function;
KeccakP400_Permute_20rounds:
push { r4 - r6, lr }
mov r2, r8
mov r3, r9
mov r4, r10
mov r5, r11
mov r6, r12
push { r2 - r7 }
sub sp, sp, #25*2+6
mov r6, sp
adr r7, KeccakP400_Permute_RoundConstants20
b KeccakP400_Permute_RoundLoop
.align 4
KeccakP400_Permute_RoundConstants20:
.short 0x0001
.short 0x8082
.short 0x808a
.short 0x8000
.short 0x808b
.short 0x0001
.short 0x8081
.short 0x8009
.short 0x008a
.short 0x0088
.short 0x8009
.short 0x000a
.short 0x808b
.short 0x008b
.short 0x8089
.short 0x8003
.short 0x8002
.short 0x0080
.short 0x800a
.short 0x000a
KeccakP400_Permute_RoundConstants:
.short 0xFF @terminator
KeccakP400_Permute_RoundLoop:
mov r8, r7
KeccakRound r6, r0, 0
KeccakP400_Permute_RoundOdd:
KeccakRound r0, r6, 2
adds r7, r7, #4
ldrh r1, [r7]
cmp r1, #0xFF
beq KeccakP400_Permute_Done
b KeccakP400_Permute_RoundLoop
KeccakP400_Permute_Done:
add sp,sp,#25*2+6
pop { r1 - r5, r7 }
mov r8, r1
mov r9, r2
mov r10, r3
mov r11, r4
mov r12, r5
pop { r4 - r6, pc }
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include "api.h"
#include "isap.h"
#include "inttypes.h"
#include "KeccakP-400-SnP.h"
const unsigned char ISAP_IV1[] = {0x01,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const unsigned char ISAP_IV2[] = {0x02,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const unsigned char ISAP_IV3[] = {0x03,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
/****************************************/
/* IsapRk */
/****************************************/
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long inlen,
unsigned char *out,
const unsigned long outlen
){
// Init State
unsigned char state[ISAP_STATE_SZ] __attribute__((aligned(4)));
KeccakP400_Initialize(state);
KeccakP400_AddBytes(state,k,0,CRYPTO_KEYBYTES);
KeccakP400_AddBytes(state,iv,CRYPTO_KEYBYTES,ISAP_IV_SZ);
KeccakP400_Permute_Nrounds(state,ISAP_sK);
// Absorb
for (unsigned long i = 0; i < inlen*8-1; i++){
unsigned long cur_byte_pos = i/8;
unsigned long cur_bit_pos = 7-(i%8);
unsigned char cur_bit = ((in[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7;
state[0] ^= cur_bit;
KeccakP400_Permute_Nrounds(state,ISAP_sB);
}
unsigned char cur_bit = ((in[inlen-1]) & 0x01) << 7;
state[0] ^= cur_bit;
KeccakP400_Permute_Nrounds(state,ISAP_sK);
// Squeeze K*
KeccakP400_ExtractBytes(state,out,0,outlen);
}
/****************************************/
/* IsapMac */
/****************************************/
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long adlen,
const unsigned char *c, const unsigned long clen,
unsigned char *tag
){
// Init State
unsigned char state[ISAP_STATE_SZ] __attribute__ ((aligned(4)));
uint16_t *state16 = (uint16_t *)state;
uint16_t *ad16 = (uint16_t *)ad;
uint16_t *c16 = (uint16_t *)c;
KeccakP400_Initialize(state);
KeccakP400_AddBytes(state,npub,0,CRYPTO_NPUBBYTES);
KeccakP400_AddBytes(state,ISAP_IV1,CRYPTO_NPUBBYTES,ISAP_IV_SZ);
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Absorb AD
unsigned long idx16 = 0;
unsigned long rem_bytes = adlen;
while(rem_bytes>=18){
state16[0] ^= ad16[idx16+0];
state16[1] ^= ad16[idx16+1];
state16[2] ^= ad16[idx16+2];
state16[3] ^= ad16[idx16+3];
state16[4] ^= ad16[idx16+4];
state16[5] ^= ad16[idx16+5];
state16[6] ^= ad16[idx16+6];
state16[7] ^= ad16[idx16+7];
state16[8] ^= ad16[idx16+8];
rem_bytes -= 18;
idx16 += 9;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
}
uint32_t idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
state[i] ^= ad[idx8+i];
}
state[rem_bytes] ^= 0x80;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Domain Seperation
state[ISAP_STATE_SZ-1] ^= 0x01;
// Absorb C
idx16 = 0;
rem_bytes = clen;
while(rem_bytes>=18){
state16[0] ^= c16[idx16+0];
state16[1] ^= c16[idx16+1];
state16[2] ^= c16[idx16+2];
state16[3] ^= c16[idx16+3];
state16[4] ^= c16[idx16+4];
state16[5] ^= c16[idx16+5];
state16[6] ^= c16[idx16+6];
state16[7] ^= c16[idx16+7];
state16[8] ^= c16[idx16+8];
rem_bytes -= 18;
idx16 += 9;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
}
idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
state[i] ^= c[idx8+i];
}
state[rem_bytes] ^= 0x80;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Derive Ka*
unsigned char y[CRYPTO_KEYBYTES];
unsigned char ka_star[CRYPTO_KEYBYTES];
KeccakP400_ExtractBytes(state,y,0,CRYPTO_KEYBYTES);
isap_rk(k,ISAP_IV2,y,CRYPTO_KEYBYTES,ka_star,CRYPTO_KEYBYTES);
// Squeezing Tag
KeccakP400_OverwriteBytes(state,ka_star,0,CRYPTO_KEYBYTES);
KeccakP400_Permute_Nrounds(state,ISAP_sH);
KeccakP400_ExtractBytes(state,tag,0,CRYPTO_KEYBYTES);
}
/****************************************/
/* IsapEnc */
/****************************************/
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long mlen,
unsigned char *c
){
uint8_t state[ISAP_STATE_SZ] __attribute__((aligned(4)));
uint16_t *state16 = (uint16_t *)state;
uint16_t *c16 = (uint16_t *)c;
uint16_t *m16 = (uint16_t *)m;
isap_rk(k,ISAP_IV3,npub,CRYPTO_NPUBBYTES,(unsigned char *)state,ISAP_STATE_SZ-CRYPTO_NPUBBYTES);
KeccakP400_OverwriteBytes(state,npub,ISAP_STATE_SZ-CRYPTO_NPUBBYTES,CRYPTO_NPUBBYTES);
uint16_t ks16[9];
unsigned long idx16 = 0;
unsigned long rem_bytes = mlen;
// Squeeze Keystream
while(rem_bytes>=18){
KeccakP400_Permute_Nrounds(state,ISAP_sE);
c16[idx16+0] = m16[idx16+0] ^ state16[0];
c16[idx16+1] = m16[idx16+1] ^ state16[1];
c16[idx16+2] = m16[idx16+2] ^ state16[2];
c16[idx16+3] = m16[idx16+3] ^ state16[3];
c16[idx16+4] = m16[idx16+4] ^ state16[4];
c16[idx16+5] = m16[idx16+5] ^ state16[5];
c16[idx16+6] = m16[idx16+6] ^ state16[6];
c16[idx16+7] = m16[idx16+7] ^ state16[7];
c16[idx16+8] = m16[idx16+8] ^ state16[8];
rem_bytes -= 18;
idx16 += 9;
}
// Squeeze Keystream
if(rem_bytes>0){
KeccakP400_Permute_Nrounds(state,ISAP_sE);
unsigned long idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
c[idx8+i] = m[idx8+i] ^ state[i];
}
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 144
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 16
#define ISAP_sB 1
#define ISAP_sE 8
#define ISAP_sK 8
// State size in bytes
#define ISAP_STATE_SZ 50
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH+7)/8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long adlen,
const unsigned char *c, const unsigned long clen,
unsigned char *tag
);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long inlen,
unsigned char *out,
const unsigned long outlen
);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long mlen,
unsigned char *c
);
#endif
/*
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
---
Please refer to SnP-documentation.h for more details.
*/
#ifndef _KeccakP_400_SnP_h_
#define _KeccakP_400_SnP_h_
#define KeccakP400_implementation "32-bit optimized ARM assembler implementation"
#define KeccakP400_stateSizeInBytes 50
#define KeccakP400_stateAlignment 4
/* void KeccakP400_StaticInitialize( void ); */
#define KeccakP400_StaticInitialize()
void KeccakP400_Initialize(void *state);
/* void KeccakP400_AddByte(void *state, unsigned char data, unsigned int offset); */
#define KeccakP400_AddByte(argS, argData, argOffset) ((unsigned char*)argS)[argOffset] ^= (argData)
void KeccakP400_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_OverwriteWithZeroes(void *state, unsigned int byteCount);
void KeccakP400_Permute_Nrounds(void *state, unsigned int nrounds);
void KeccakP400_Permute_20rounds(void *state);
void KeccakP400_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
#endif
@
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@
@ For more information, feedback or questions, please refer to our website:
@ https://keccak.team/
@
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ http://creativecommons.org/publicdomain/zero/1.0/
@
@ ---
@
@ This file implements Keccak-p[400] in a SnP-compatible way.
@ Please refer to SnP-documentation.h for more details.
@
@ This implementation comes with KeccakP-400-SnP.h in the same folder.
@ Please refer to LowLevel.build for the exact list of other files it must be combined with.
@
@ WARNING: This implementation assumes a little endian CPU with ARMv7M architecture (e.g., Cortex-M3) and the GCC compiler.
.thumb
.syntax unified
.text
@----------------------------------------------------------------------------
.equ _ba , 0*2
.equ _be , 1*2
.equ _bi , 2*2
.equ _bo , 3*2
.equ _bu , 4*2
.equ _ga , 5*2
.equ _ge , 6*2
.equ _gi , 7*2
.equ _go , 8*2
.equ _gu , 9*2
.equ _ka , 10*2
.equ _ke , 11*2
.equ _ki , 12*2
.equ _ko , 13*2
.equ _ku , 14*2
.equ _ma , 15*2
.equ _me , 16*2
.equ _mi , 17*2
.equ _mo , 18*2
.equ _mu , 19*2
.equ _sa , 20*2
.equ _se , 21*2
.equ _si , 22*2
.equ _so , 23*2
.equ _su , 24*2
.macro xor5 result,ptr,b,g,k,m,rs
ldrh \result, [\ptr, #\b]
ldrh r6, [\ptr, #\g]
eor \result, \result, \rs
ldrh \rs, [\ptr, #\k]
eor \result, \result, r6
ldrh r6, [\ptr, #\m]
eor \result, \result, \rs
eor \result, \result, r6
.endm
.macro xor5D resultL,resultH,ptr,b,g,k,m,rsL,rsH
bfi \rsL, \rsH, #16, #16
ldr \resultL, [\ptr, #\b]
ldr r6, [\ptr, #\g]
eor \resultL, \resultL, \rsL
ldr \rsL, [\ptr, #\k]
eor \resultL, \resultL, r6
ldr r6, [\ptr, #\m]
eor \resultL, \resultL, \rsL
eor \resultL, \resultL, r6
lsr \resultH, \resultL, #16
uxth \resultL, \resultL
.endm
.macro xorrol b, yy, rr
eor \b, \b, \yy
.if \rr != 8
lsl \b, \b, #\rr
orr \b, \b, \b, LSR #16
.else
rev16 \b, \b
.endif
.endm
.macro rolxor d, a, b
eor \d, \a, \b, LSL #1
eor \d, \d, \b, LSR #15
uxth \d, \d
.endm
.macro xandnot resptr, resofs, aa, bb, cc, temp
bic \temp, \cc, \bb
eor \temp, \temp, \aa
strh \temp, [\resptr, #\resofs]
.endm
.macro xandnotRC resptr, resofs, aa, bb, cc
ldrh r6, [r8], #2
bic \cc, \cc, \bb
eor \cc, \cc, r6
eor \cc, \cc, \aa
strh \cc, [\resptr, #\resofs]
.endm
.macro KeccakRound sOut, sIn
@prepTheta
xor5D r1, r2, \sIn, _ba, _ga, _ka, _ma, r9, r10
xor5D r3, r4, \sIn, _bi, _gi, _ki, _mi, r11, r12
rolxor r9, r7, r2
rolxor r10, r1, r3
rolxor r11, r2, r4
rolxor r12, r3, r7
rolxor lr, r4, r1
@thetaRhoPiChiIota
ldrh r1, [\sIn, #_ba]
ldrh r2, [\sIn, #_ge]
ldrh r3, [\sIn, #_ki]
ldrh r4, [\sIn, #_mo]
eor r1, r1, r9
xorrol r2, r10, 12
xorrol r3, r11, 11
xorrol r4, r12, 5
xorrol r5, lr, 14
xandnot \sOut, _be, r2, r3, r4, r6
xandnot \sOut, _bi, r3, r4, r5, r6
xandnot \sOut, _bo, r4, r5, r1, r6
xandnot \sOut, _bu, r5, r1, r2, r7
xandnotRC \sOut, _ba, r1, r2, r3
ldrh r1, [\sIn, #_bo]
ldrh r2, [\sIn, #_gu]
ldrh r3, [\sIn, #_ka]
ldrh r4, [\sIn, #_me]
ldrh r5, [\sIn, #_si]
xorrol r1, r12, 12
xorrol r2, lr, 4
xorrol r3, r9, 3
xorrol r4, r10, 13
xorrol r5, r11, 13
xandnot \sOut, _ga, r1, r2, r3, r6
xandnot \sOut, _ge, r2, r3, r4, r6
xandnot \sOut, _gi, r3, r4, r5, r6
xandnot \sOut, _go, r4, r5, r1, r6
xandnot \sOut, _gu, r5, r1, r2, r6
eor r7, r7, r6
ldrh r1, [\sIn, #_be]
ldrh r2, [\sIn, #_gi]
ldrh r3, [\sIn, #_ko]
ldrh r4, [\sIn, #_mu]
ldrh r5, [\sIn, #_sa]
xorrol r1, r10, 1
xorrol r2, r11, 6
xorrol r3, r12, 9
xorrol r4, lr, 8
xorrol r5, r9, 2
xandnot \sOut, _ka, r1, r2, r3, r6
xandnot \sOut, _ke, r2, r3, r4, r6
xandnot \sOut, _ki, r3, r4, r5, r6
xandnot \sOut, _ko, r4, r5, r1, r6
xandnot \sOut, _ku, r5, r1, r2, r6
eor r7, r7, r6
ldrh r1, [\sIn, #_bu]
ldrh r2, [\sIn, #_ga]
ldrh r3, [\sIn, #_ke]
ldrh r4, [\sIn, #_mi]
ldrh r5, [\sIn, #_so]
xorrol r1, lr, 11
xorrol r2, r9, 4
xorrol r3, r10, 10
xorrol r4, r11, 15
xorrol r5, r12, 8
xandnot \sOut, _ma, r1, r2, r3, r6
xandnot \sOut, _me, r2, r3, r4, r6
xandnot \sOut, _mi, r3, r4, r5, r6
xandnot \sOut, _mo, r4, r5, r1, r6
xandnot \sOut, _mu, r5, r1, r2, r6
eor r7, r7, r6
ldrh r1, [\sIn, #_bi]
ldrh r2, [\sIn, #_go]
ldrh r3, [\sIn, #_ku]
ldrh r4, [\sIn, #_ma]
ldrh r5, [\sIn, #_se]
xorrol r1, r11, 14
xorrol r2, r12, 7
xorrol r3, lr, 7
xorrol r4, r9, 9
xorrol r5, r10, 2
xandnot \sOut, _sa, r1, r2, r3, r9
xandnot \sOut, _se, r2, r3, r4, r10
xandnot \sOut, _si, r3, r4, r5, r11
xandnot \sOut, _so, r4, r5, r1, r12
bic r1, r2, r1
eor r5, r5, r1
eor r7, r7, r5
uxth r7, r7
strh r5, [\sOut, #_su]
uxth r5, r5
.endm
@----------------------------------------------------------------------------
@
@ void KeccakP400_StaticInitialize( void )
@
.align 8
.global KeccakP400_StaticInitialize
.type KeccakP400_StaticInitialize, %function;
KeccakP400_StaticInitialize:
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_Initialize(void *state)
@
.align 8
.global KeccakP400_Initialize
.type KeccakP400_Initialize, %function;
KeccakP400_Initialize:
movs r1, #0
movs r2, #0
movs r3, #0
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
strh r1, [r0]
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_AddByte(void *state, unsigned char byte, unsigned int offset)
@
.align 8
.global KeccakP400_AddByte
.type KeccakP400_AddByte, %function;
KeccakP400_AddByte:
ldrb r3, [r0, r2]
eors r3, r3, r1
strb r3, [r0, r2]
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 8
.global KeccakP400_AddBytes
.type KeccakP400_AddBytes, %function;
KeccakP400_AddBytes:
subs r3, r3, #1
bcc KeccakP400_AddBytes_Exit
adds r0, r0, r2
push {r4,lr}
KeccakP400_AddBytes_Loop:
ldrb r2, [r1, r3]
ldrb r4, [r0, r3]
eors r2, r2, r4
strb r2, [r0, r3]
subs r3, r3, #1
bcs KeccakP400_AddBytes_Loop
pop {r4,pc}
KeccakP400_AddBytes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 8
.global KeccakP400_OverwriteBytes
.type KeccakP400_OverwriteBytes, %function;
KeccakP400_OverwriteBytes:
subs r3, r3, #1
bcc KeccakP400_OverwriteBytes_Exit
adds r0, r0, r2
KeccakP400_OverwriteBytes_Loop:
ldrb r2, [r1, r3]
strb r2, [r0, r3]
subs r3, r3, #1
bcs KeccakP400_OverwriteBytes_Loop
KeccakP400_OverwriteBytes_Exit:
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_OverwriteWithZeroes(void *state, unsigned int byteCount)
@
.align 8
.global KeccakP400_OverwriteWithZeroes
.type KeccakP400_OverwriteWithZeroes, %function;
KeccakP400_OverwriteWithZeroes:
movs r3, #0
lsrs r2, r1, #2
beq KeccakP400_OverwriteWithZeroes_Bytes
KeccakP400_OverwriteWithZeroes_Loop2Lanes:
str r3, [r0], #4
subs r2, r2, #1
bne KeccakP400_OverwriteWithZeroes_Loop2Lanes
KeccakP400_OverwriteWithZeroes_Bytes:
ands r1, #3
beq KeccakP400_OverwriteWithZeroes_Exit
KeccakP400_OverwriteWithZeroes_LoopBytes:
strb r3, [r0], #1
subs r1, r1, #1
bne KeccakP400_OverwriteWithZeroes_LoopBytes
KeccakP400_OverwriteWithZeroes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 8
.global KeccakP400_ExtractBytes
.type KeccakP400_ExtractBytes, %function;
KeccakP400_ExtractBytes:
subs r3, r3, #1
bcc KeccakP400_ExtractBytes_Exit
adds r0, r0, r2
KeccakP400_ExtractBytes_Loop:
ldrb r2, [r0, r3]
strb r2, [r1, r3]
subs r3, r3, #1
bcs KeccakP400_ExtractBytes_Loop
KeccakP400_ExtractBytes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
@
.align 8
.global KeccakP400_ExtractAndAddBytes
.type KeccakP400_ExtractAndAddBytes, %function;
KeccakP400_ExtractAndAddBytes:
push {r4,r5}
adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length)
ldr r3, [sp, #8] @ get length argument from stack
subs r3, r3, #1 @ .if length != 0
bcc KeccakP400_ExtractAndAddBytes_Exit
KeccakP400_ExtractAndAddBytes_Loop:
ldrb r5, [r0, r3]
ldrb r4, [r1, r3]
eors r5, r5, r4
strb r5, [r2, r3]
subs r3, r3, #1
bcs KeccakP400_ExtractAndAddBytes_Loop
KeccakP400_ExtractAndAddBytes_Exit:
pop {r4,r5}
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_Permute_Nrounds( void *state, unsigned int nr )
@
.align 8
.global KeccakP400_Permute_Nrounds
.type KeccakP400_Permute_Nrounds, %function;
KeccakP400_Permute_Nrounds:
push {r4-r12,lr}
sub sp, sp, #2*25+6
adr r8, KeccakP400_Permute_RoundConstants
sub r8, r8, r1, LSL #1
lsls r1, r1, #31
beq KeccakP400_PermuteIntern
mov r1, sp @ odd number of rounds
ldm r0!, { r2, r3, r4, r5, r11, r12 } @ copy state to stack
stm r1!, { r2, r3, r4, r5, r11, r12 }
ldm r0!, { r2, r3, r4, r5, r11, r12 }
stm r1!, { r2, r3, r4, r5, r11, r12 }
sub r0, r0, #2*24
ldrh lr, [r0, #_su]
strh lr, [sp, #_su]
uxth r9, r11
uxth r10, r11, ROR #16
uxth r11, r12
lsr r12, r12, #16
mov r5, lr
xor5 r7, r0, _bu, _gu, _ku, _mu, lr
b KeccakP400_Permute_RoundOdd
@----------------------------------------------------------------------------
@
@ void KeccakP400_Permute_20rounds( void *state )
@
.align 8
.global KeccakP400_Permute_20rounds
.type KeccakP400_Permute_20rounds, %function;
KeccakP400_Permute_20rounds:
push {r4-r12,lr}
sub sp, sp, #2*25+6
adr r8, KeccakP400_Permute_RoundConstants20
KeccakP400_PermuteIntern:
ldrh r9, [r0, #_sa]
ldrh r10, [r0, #_se]
ldrh r11, [r0, #_si]
ldrh lr, [r0, #_su]
ldrh r12, [r0, #_so]
mov r5, lr
xor5 r7, r0, _bu, _gu, _ku, _mu, lr
KeccakP400_Permute_RoundLoop:
KeccakRound sp, r0
KeccakP400_Permute_RoundOdd:
KeccakRound r0, sp
ldrh r3, [r8]
cmp r3, #0
bne KeccakP400_Permute_RoundLoop
add sp,sp,#2*25+6
pop {r4-r12,pc}
.align 8
KeccakP400_Permute_RoundConstants20:
.short 0x0001
.short 0x8082
.short 0x808a
.short 0x8000
.short 0x808b
.short 0x0001
.short 0x8081
.short 0x8009
.short 0x008a
.short 0x0088
.short 0x8009
.short 0x000a
.short 0x808b
.short 0x008b
.short 0x8089
.short 0x8003
.short 0x8002
.short 0x0080
.short 0x800a
.short 0x000a
KeccakP400_Permute_RoundConstants:
.short 0 @ terminator
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include "api.h"
#include "isap.h"
#include "inttypes.h"
#include "KeccakP-400-SnP.h"
const unsigned char ISAP_IV1[] = {0x01,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const unsigned char ISAP_IV2[] = {0x02,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const unsigned char ISAP_IV3[] = {0x03,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
/****************************************/
/* IsapRk */
/****************************************/
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long inlen,
unsigned char *out,
const unsigned long outlen
){
// Init State
unsigned char state[ISAP_STATE_SZ] __attribute__((aligned(4)));
KeccakP400_Initialize(state);
KeccakP400_AddBytes(state,k,0,CRYPTO_KEYBYTES);
KeccakP400_AddBytes(state,iv,CRYPTO_KEYBYTES,ISAP_IV_SZ);
KeccakP400_Permute_Nrounds(state,ISAP_sK);
// Absorb
for (unsigned long i = 0; i < inlen*8-1; i++){
unsigned long cur_byte_pos = i/8;
unsigned long cur_bit_pos = 7-(i%8);
unsigned char cur_bit = ((in[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7;
state[0] ^= cur_bit;
KeccakP400_Permute_Nrounds(state,ISAP_sB);
}
unsigned char cur_bit = ((in[inlen-1]) & 0x01) << 7;
state[0] ^= cur_bit;
KeccakP400_Permute_Nrounds(state,ISAP_sK);
// Squeeze K*
KeccakP400_ExtractBytes(state,out,0,outlen);
}
/****************************************/
/* IsapMac */
/****************************************/
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long adlen,
const unsigned char *c, const unsigned long clen,
unsigned char *tag
){
// Init State
unsigned char state[ISAP_STATE_SZ] __attribute__ ((aligned(4)));
uint16_t *state16 = (uint16_t *)state;
uint16_t *ad16 = (uint16_t *)ad;
uint16_t *c16 = (uint16_t *)c;
KeccakP400_Initialize(state);
KeccakP400_AddBytes(state,npub,0,CRYPTO_NPUBBYTES);
KeccakP400_AddBytes(state,ISAP_IV1,CRYPTO_NPUBBYTES,ISAP_IV_SZ);
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Absorb AD
unsigned long idx16 = 0;
unsigned long rem_bytes = adlen;
while(rem_bytes>=18){
state16[0] ^= ad16[idx16+0];
state16[1] ^= ad16[idx16+1];
state16[2] ^= ad16[idx16+2];
state16[3] ^= ad16[idx16+3];
state16[4] ^= ad16[idx16+4];
state16[5] ^= ad16[idx16+5];
state16[6] ^= ad16[idx16+6];
state16[7] ^= ad16[idx16+7];
state16[8] ^= ad16[idx16+8];
rem_bytes -= 18;
idx16 += 9;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
}
uint32_t idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
state[i] ^= ad[idx8+i];
}
state[rem_bytes] ^= 0x80;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Domain Seperation
state[ISAP_STATE_SZ-1] ^= 0x01;
// Absorb C
idx16 = 0;
rem_bytes = clen;
while(rem_bytes>=18){
state16[0] ^= c16[idx16+0];
state16[1] ^= c16[idx16+1];
state16[2] ^= c16[idx16+2];
state16[3] ^= c16[idx16+3];
state16[4] ^= c16[idx16+4];
state16[5] ^= c16[idx16+5];
state16[6] ^= c16[idx16+6];
state16[7] ^= c16[idx16+7];
state16[8] ^= c16[idx16+8];
rem_bytes -= 18;
idx16 += 9;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
}
idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
state[i] ^= c[idx8+i];
}
state[rem_bytes] ^= 0x80;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Derive Ka*
unsigned char y[CRYPTO_KEYBYTES];
unsigned char ka_star[CRYPTO_KEYBYTES];
KeccakP400_ExtractBytes(state,y,0,CRYPTO_KEYBYTES);
isap_rk(k,ISAP_IV2,y,CRYPTO_KEYBYTES,ka_star,CRYPTO_KEYBYTES);
// Squeezing Tag
KeccakP400_OverwriteBytes(state,ka_star,0,CRYPTO_KEYBYTES);
KeccakP400_Permute_Nrounds(state,ISAP_sH);
KeccakP400_ExtractBytes(state,tag,0,CRYPTO_KEYBYTES);
}
/****************************************/
/* IsapEnc */
/****************************************/
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long mlen,
unsigned char *c
){
uint8_t state[ISAP_STATE_SZ] __attribute__((aligned(4)));
uint16_t *state16 = (uint16_t *)state;
uint16_t *c16 = (uint16_t *)c;
uint16_t *m16 = (uint16_t *)m;
isap_rk(k,ISAP_IV3,npub,CRYPTO_NPUBBYTES,(unsigned char *)state,ISAP_STATE_SZ-CRYPTO_NPUBBYTES);
KeccakP400_OverwriteBytes(state,npub,ISAP_STATE_SZ-CRYPTO_NPUBBYTES,CRYPTO_NPUBBYTES);
uint16_t ks16[9];
unsigned long idx16 = 0;
unsigned long rem_bytes = mlen;
// Squeeze Keystream
while(rem_bytes>=18){
KeccakP400_Permute_Nrounds(state,ISAP_sE);
c16[idx16+0] = m16[idx16+0] ^ state16[0];
c16[idx16+1] = m16[idx16+1] ^ state16[1];
c16[idx16+2] = m16[idx16+2] ^ state16[2];
c16[idx16+3] = m16[idx16+3] ^ state16[3];
c16[idx16+4] = m16[idx16+4] ^ state16[4];
c16[idx16+5] = m16[idx16+5] ^ state16[5];
c16[idx16+6] = m16[idx16+6] ^ state16[6];
c16[idx16+7] = m16[idx16+7] ^ state16[7];
c16[idx16+8] = m16[idx16+8] ^ state16[8];
rem_bytes -= 18;
idx16 += 9;
}
// Squeeze Keystream
if(rem_bytes>0){
KeccakP400_Permute_Nrounds(state,ISAP_sE);
unsigned long idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
c[idx8+i] = m[idx8+i] ^ state[i];
}
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 144
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 16
#define ISAP_sB 1
#define ISAP_sE 8
#define ISAP_sK 8
// State size in bytes
#define ISAP_STATE_SZ 50
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH+7)/8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long adlen,
const unsigned char *c, const unsigned long clen,
unsigned char *tag
);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long inlen,
unsigned char *out,
const unsigned long outlen
);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long mlen,
unsigned char *c
);
#endif
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
---
Please refer to SnP-documentation.h for more details.
*/
#ifndef _KeccakP_400_SnP_h_
#define _KeccakP_400_SnP_h_
#define KeccakP400_implementation "32-bit optimized ARM assembler implementation"
#define KeccakP400_stateSizeInBytes 50
#define KeccakP400_stateAlignment 4
/* void KeccakP400_StaticInitialize( void ); */
#define KeccakP400_StaticInitialize()
void KeccakP400_Initialize(void *state);
/* void KeccakP400_AddByte(void *state, unsigned char data, unsigned int offset); */
#define KeccakP400_AddByte(argS, argData, argOffset) ((unsigned char*)argS)[argOffset] ^= (argData)
void KeccakP400_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_OverwriteWithZeroes(void *state, unsigned int byteCount);
void KeccakP400_Permute_Nrounds(void *state, unsigned int nrounds);
void KeccakP400_Permute_20rounds(void *state);
void KeccakP400_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
#endif
@
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@
@ For more information, feedback or questions, please refer to our website:
@ https://keccak.team/
@
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ http://creativecommons.org/publicdomain/zero/1.0/
@
@ ---
@
@ This file implements Keccak-p[400] in a SnP-compatible way.
@ Please refer to SnP-documentation.h for more details.
@
@ This implementation comes with KeccakP-400-SnP.h in the same folder.
@ Please refer to LowLevel.build for the exact list of other files it must be combined with.
@
@ WARNING: This implementation assumes a little endian CPU with@ ARMv6M architecture (e.g., Cortex-M0).
.thumb
.syntax unified
.text
.equ _ba , 0*2
.equ _be , 1*2
.equ _bi , 2*2
.equ _bo , 3*2
.equ _bu , 4*2
.equ _ga , 5*2
.equ _ge , 6*2
.equ _gi , 7*2
.equ _go , 8*2
.equ _gu , 9*2
.equ _ka , 10*2
.equ _ke , 11*2
.equ _ki , 12*2
.equ _ko , 13*2
.equ _ku , 14*2
.equ _ma , 15*2
.equ _me , 16*2
.equ _mi , 17*2
.equ _mo , 18*2
.equ _mu , 19*2
.equ _sa , 20*2
.equ _se , 21*2
.equ _si , 22*2
.equ _so , 23*2
.equ _su , 24*2
.macro xor5 result,ptr,b,g,k,m,s
ldrh \result, [\ptr, #\b]
ldrh r7, [\ptr, #\g]
eors \result, \result, r7
ldrh r7, [\ptr, #\k]
eors \result, \result, r7
ldrh r7, [\ptr, #\m]
eors \result, \result, r7
ldrh r7, [\ptr, #\s]
eors \result, \result, r7
.endm
.macro xorrol b, yy, rr
mov r7, \yy
eors \b, \b, r7
.if \rr != 0
lsls r7, \b, #\rr
lsrs \b, \b, #16-\rr
orrs \b, \b, r7
uxth \b, \b
.endif
.endm
.macro rolxor d, a, b, rot
sxth r7, \b
rors r7, r7, \rot
eors r7, r7, \a
uxth r7, r7
mov \d, r7
.endm
.macro xandnot resptr, resofs, aa, bb, cc, temp
mov \temp, \cc
bics \temp, \temp, \bb
eors \temp, \temp, \aa
strh \temp, [\resptr, #\resofs]
.endm
.macro xandnotRC resptr, resofs, aa, bb, cc, rco
bics \cc, \cc, \bb
eors \cc, \cc, \aa
mov r7, r8
ldrh \bb, [r7, #\rco]
eors \cc, \cc, \bb
strh \cc, [\resptr, #\resofs]
.endm
.macro KeccakRound sOut, sIn, rco
@prepTheta
push { \sOut }
movs \sOut, #31
xor5 r1, \sIn, _ba, _ga, _ka, _ma, _sa
xor5 r2, \sIn, _be, _ge, _ke, _me, _se
xor5 r3, \sIn, _bi, _gi, _ki, _mi, _si
xor5 r4, \sIn, _bo, _go, _ko, _mo, _so
xor5 r5, \sIn, _bu, _gu, _ku, _mu, _su
rolxor r9, r5, r2, \sOut
rolxor r10, r1, r3, \sOut
rolxor r11, r2, r4, \sOut
rolxor r12, r3, r5, \sOut
rolxor lr, r4, r1, \sOut
pop { \sOut }
@thetaRhoPiChiIota
ldrh r1, [\sIn, #_bo]
ldrh r2, [\sIn, #_gu]
ldrh r3, [\sIn, #_ka]
ldrh r4, [\sIn, #_me]
ldrh r5, [\sIn, #_si]
xorrol r1, r12, 12
xorrol r2, lr, 4
xorrol r3, r9, 3
xorrol r4, r10, 13
xorrol r5, r11, 13
xandnot \sOut, _ga, r1, r2, r3, r7
xandnot \sOut, _ge, r2, r3, r4, r7
xandnot \sOut, _gi, r3, r4, r5, r7
xandnot \sOut, _go, r4, r5, r1, r7
xandnot \sOut, _gu, r5, r1, r2, r7
ldrh r1, [\sIn, #_be]
ldrh r2, [\sIn, #_gi]
ldrh r3, [\sIn, #_ko]
ldrh r4, [\sIn, #_mu]
ldrh r5, [\sIn, #_sa]
xorrol r1, r10, 1
xorrol r2, r11, 6
xorrol r3, r12, 9
xorrol r4, lr, 8
xorrol r5, r9, 2
xandnot \sOut, _ka, r1, r2, r3, r7
xandnot \sOut, _ke, r2, r3, r4, r7
xandnot \sOut, _ki, r3, r4, r5, r7
xandnot \sOut, _ko, r4, r5, r1, r7
xandnot \sOut, _ku, r5, r1, r2, r7
ldrh r1, [\sIn, #_bu]
ldrh r2, [\sIn, #_ga]
ldrh r3, [\sIn, #_ke]
ldrh r4, [\sIn, #_mi]
ldrh r5, [\sIn, #_so]
xorrol r1, lr, 11
xorrol r2, r9, 4
xorrol r3, r10, 10
xorrol r4, r11, 15
xorrol r5, r12, 8
xandnot \sOut, _ma, r1, r2, r3, r7
xandnot \sOut, _me, r2, r3, r4, r7
xandnot \sOut, _mi, r3, r4, r5, r7
xandnot \sOut, _mo, r4, r5, r1, r7
xandnot \sOut, _mu, r5, r1, r2, r7
ldrh r1, [\sIn, #_bi]
ldrh r2, [\sIn, #_go]
ldrh r3, [\sIn, #_ku]
ldrh r4, [\sIn, #_ma]
ldrh r5, [\sIn, #_se]
xorrol r1, r11, 14
xorrol r2, r12, 7
xorrol r3, lr, 7
xorrol r4, r9, 9
xorrol r5, r10, 2
xandnot \sOut, _sa, r1, r2, r3, r7
xandnot \sOut, _se, r2, r3, r4, r7
xandnot \sOut, _si, r3, r4, r5, r7
xandnot \sOut, _so, r4, r5, r1, r7
xandnot \sOut, _su, r5, r1, r2, r7
ldrh r1, [\sIn, #_ba]
ldrh r2, [\sIn, #_ge]
ldrh r3, [\sIn, #_ki]
ldrh r4, [\sIn, #_mo]
ldrh r5, [\sIn, #_su]
xorrol r1, r9, 0
xorrol r2, r10, 12
xorrol r3, r11, 11
xorrol r4, r12, 5
xorrol r5, lr, 14
xandnot \sOut, _be, r2, r3, r4, r7
xandnot \sOut, _bi, r3, r4, r5, r7
xandnot \sOut, _bo, r4, r5, r1, r7
xandnot \sOut, _bu, r5, r1, r2, r7
xandnotRC \sOut, _ba, r1, r2, r3, \rco
.endm
@----------------------------------------------------------------------------
@
@ void KeccakP400_StaticInitialize( void )
@
.align 4
.global KeccakP400_StaticInitialize
.type KeccakP400_StaticInitialize, %function;
KeccakP400_StaticInitialize:
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_Initialize(void *state)
@
.align 4
.global KeccakP400_Initialize
.type KeccakP400_Initialize, %function;
KeccakP400_Initialize:
movs r1, #0
movs r2, #0
movs r3, #0
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
strh r1, [r0]
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_AddByte(void *state, unsigned char byte, unsigned int offset)
@
.align 4
.global KeccakP400_AddByte
.type KeccakP400_AddByte, %function;
KeccakP400_AddByte:
ldrb r3, [r0, r2]
eors r3, r3, r1
strb r3, [r0, r2]
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 4
.global KeccakP400_AddBytes
.type KeccakP400_AddBytes, %function;
KeccakP400_AddBytes:
subs r3, r3, #1
bcc KeccakP400_AddBytes_Exit
adds r0, r0, r2
push {r4,lr}
KeccakP400_AddBytes_Loop:
ldrb r2, [r1, r3]
ldrb r4, [r0, r3]
eors r2, r2, r4
strb r2, [r0, r3]
subs r3, r3, #1
bcs KeccakP400_AddBytes_Loop
pop {r4,pc}
KeccakP400_AddBytes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 4
.global KeccakP400_OverwriteBytes
.type KeccakP400_OverwriteBytes, %function;
KeccakP400_OverwriteBytes:
subs r3, r3, #1
bcc KeccakP400_OverwriteBytes_Exit
adds r0, r0, r2
KeccakP400_OverwriteBytes_Loop:
ldrb r2, [r1, r3]
strb r2, [r0, r3]
subs r3, r3, #1
bcs KeccakP400_OverwriteBytes_Loop
KeccakP400_OverwriteBytes_Exit:
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_OverwriteWithZeroes(void *state, unsigned int byteCount)
@
.align 4
.global KeccakP400_OverwriteWithZeroes
.type KeccakP400_OverwriteWithZeroes, %function;
KeccakP400_OverwriteWithZeroes:
movs r3, #0
cmp r1, #0
beq KeccakP400_OverwriteWithZeroes_Exit
KeccakP400_OverwriteWithZeroes_LoopBytes:
subs r1, r1, #1
strb r3, [r0, r1]
bne KeccakP400_OverwriteWithZeroes_LoopBytes
KeccakP400_OverwriteWithZeroes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 4
.global KeccakP400_ExtractBytes
.type KeccakP400_ExtractBytes, %function;
KeccakP400_ExtractBytes:
subs r3, r3, #1
bcc KeccakP400_ExtractBytes_Exit
adds r0, r0, r2
KeccakP400_ExtractBytes_Loop:
ldrb r2, [r0, r3]
strb r2, [r1, r3]
subs r3, r3, #1
bcs KeccakP400_ExtractBytes_Loop
KeccakP400_ExtractBytes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
@
.align 4
.global KeccakP400_ExtractAndAddBytes
.type KeccakP400_ExtractAndAddBytes, %function;
KeccakP400_ExtractAndAddBytes:
push {r4,r5}
adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length)
ldr r3, [sp, #8] @ get length argument from stack
subs r3, r3, #1 @ .if length != 0
bcc KeccakP400_ExtractAndAddBytes_Exit
KeccakP400_ExtractAndAddBytes_Loop:
ldrb r5, [r0, r3]
ldrb r4, [r1, r3]
eors r5, r5, r4
strb r5, [r2, r3]
subs r3, r3, #1
bcs KeccakP400_ExtractAndAddBytes_Loop
KeccakP400_ExtractAndAddBytes_Exit:
pop {r4,r5}
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_Permute_Nrounds( void *state, unsigned int nr )
@
.align 4
.global KeccakP400_Permute_Nrounds
.type KeccakP400_Permute_Nrounds, %function;
KeccakP400_Permute_Nrounds:
push { r4 - r6, lr }
mov r2, r8
mov r3, r9
mov r4, r10
mov r5, r11
mov r6, r12
push { r2 - r7 }
sub sp, sp, #25*2+6
mov r6, sp
adr r7, KeccakP400_Permute_RoundConstants
subs r7, r7, r1
subs r7, r7, r1
lsls r1, r1, #31
beq KeccakP400_Permute_Nrounds_GoRoundLoop
subs r7, r7, #2 @ odd number of rounds
mov r8, r7
ldm r0!, { r1, r2, r3, r4, r5, r7 } @ copy state to stack
stm r6!, { r1, r2, r3, r4, r5, r7 }
ldm r0!, { r1, r2, r3, r4, r5, r7 }
stm r6!, { r1, r2, r3, r4, r5, r7 }
subs r0, r0, #48
subs r6, r6, #48
ldrh r1, [r0, #_su]
strh r1, [r6, #_su]
b KeccakP400_Permute_RoundOdd
KeccakP400_Permute_Nrounds_GoRoundLoop:
b KeccakP400_Permute_RoundLoop
nop
@----------------------------------------------------------------------------
@
@ void KeccakP400_Permute_20rounds( void *state )
@
.align 4
.global KeccakP400_Permute_20rounds
.type KeccakP400_Permute_20rounds, %function;
KeccakP400_Permute_20rounds:
push { r4 - r6, lr }
mov r2, r8
mov r3, r9
mov r4, r10
mov r5, r11
mov r6, r12
push { r2 - r7 }
sub sp, sp, #25*2+6
mov r6, sp
adr r7, KeccakP400_Permute_RoundConstants20
b KeccakP400_Permute_RoundLoop
.align 4
KeccakP400_Permute_RoundConstants20:
.short 0x0001
.short 0x8082
.short 0x808a
.short 0x8000
.short 0x808b
.short 0x0001
.short 0x8081
.short 0x8009
.short 0x008a
.short 0x0088
.short 0x8009
.short 0x000a
.short 0x808b
.short 0x008b
.short 0x8089
.short 0x8003
.short 0x8002
.short 0x0080
.short 0x800a
.short 0x000a
KeccakP400_Permute_RoundConstants:
.short 0xFF @terminator
KeccakP400_Permute_RoundLoop:
mov r8, r7
KeccakRound r6, r0, 0
KeccakP400_Permute_RoundOdd:
KeccakRound r0, r6, 2
adds r7, r7, #4
ldrh r1, [r7]
cmp r1, #0xFF
beq KeccakP400_Permute_Done
b KeccakP400_Permute_RoundLoop
KeccakP400_Permute_Done:
add sp,sp,#25*2+6
pop { r1 - r5, r7 }
mov r8, r1
mov r9, r2
mov r10, r3
mov r11, r4
mov r12, r5
pop { r4 - r6, pc }
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include "api.h"
#include "isap.h"
#include "inttypes.h"
#include "KeccakP-400-SnP.h"
const unsigned char ISAP_IV1[] = {0x01,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const unsigned char ISAP_IV2[] = {0x02,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const unsigned char ISAP_IV3[] = {0x03,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
/****************************************/
/* IsapRk */
/****************************************/
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long inlen,
unsigned char *out,
const unsigned long outlen
){
// Init State
unsigned char state[ISAP_STATE_SZ] __attribute__((aligned(4)));
KeccakP400_Initialize(state);
KeccakP400_AddBytes(state,k,0,CRYPTO_KEYBYTES);
KeccakP400_AddBytes(state,iv,CRYPTO_KEYBYTES,ISAP_IV_SZ);
KeccakP400_Permute_Nrounds(state,ISAP_sK);
// Absorb
for (unsigned long i = 0; i < inlen*8-1; i++){
unsigned long cur_byte_pos = i/8;
unsigned long cur_bit_pos = 7-(i%8);
unsigned char cur_bit = ((in[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7;
state[0] ^= cur_bit;
KeccakP400_Permute_Nrounds(state,ISAP_sB);
}
unsigned char cur_bit = ((in[inlen-1]) & 0x01) << 7;
state[0] ^= cur_bit;
KeccakP400_Permute_Nrounds(state,ISAP_sK);
// Squeeze K*
KeccakP400_ExtractBytes(state,out,0,outlen);
}
/****************************************/
/* IsapMac */
/****************************************/
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long adlen,
const unsigned char *c, const unsigned long clen,
unsigned char *tag
){
// Init State
unsigned char state[ISAP_STATE_SZ] __attribute__ ((aligned(4)));
uint16_t *state16 = (uint16_t *)state;
uint16_t *ad16 = (uint16_t *)ad;
uint16_t *c16 = (uint16_t *)c;
KeccakP400_Initialize(state);
KeccakP400_AddBytes(state,npub,0,CRYPTO_NPUBBYTES);
KeccakP400_AddBytes(state,ISAP_IV1,CRYPTO_NPUBBYTES,ISAP_IV_SZ);
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Absorb AD
unsigned long idx16 = 0;
unsigned long rem_bytes = adlen;
while(rem_bytes>=18){
state16[0] ^= ad16[idx16+0];
state16[1] ^= ad16[idx16+1];
state16[2] ^= ad16[idx16+2];
state16[3] ^= ad16[idx16+3];
state16[4] ^= ad16[idx16+4];
state16[5] ^= ad16[idx16+5];
state16[6] ^= ad16[idx16+6];
state16[7] ^= ad16[idx16+7];
state16[8] ^= ad16[idx16+8];
rem_bytes -= 18;
idx16 += 9;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
}
uint32_t idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
state[i] ^= ad[idx8+i];
}
state[rem_bytes] ^= 0x80;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Domain Seperation
state[ISAP_STATE_SZ-1] ^= 0x01;
// Absorb C
idx16 = 0;
rem_bytes = clen;
while(rem_bytes>=18){
state16[0] ^= c16[idx16+0];
state16[1] ^= c16[idx16+1];
state16[2] ^= c16[idx16+2];
state16[3] ^= c16[idx16+3];
state16[4] ^= c16[idx16+4];
state16[5] ^= c16[idx16+5];
state16[6] ^= c16[idx16+6];
state16[7] ^= c16[idx16+7];
state16[8] ^= c16[idx16+8];
rem_bytes -= 18;
idx16 += 9;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
}
idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
state[i] ^= c[idx8+i];
}
state[rem_bytes] ^= 0x80;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Derive Ka*
unsigned char y[CRYPTO_KEYBYTES];
unsigned char ka_star[CRYPTO_KEYBYTES];
KeccakP400_ExtractBytes(state,y,0,CRYPTO_KEYBYTES);
isap_rk(k,ISAP_IV2,y,CRYPTO_KEYBYTES,ka_star,CRYPTO_KEYBYTES);
// Squeezing Tag
KeccakP400_OverwriteBytes(state,ka_star,0,CRYPTO_KEYBYTES);
KeccakP400_Permute_Nrounds(state,ISAP_sH);
KeccakP400_ExtractBytes(state,tag,0,CRYPTO_KEYBYTES);
}
/****************************************/
/* IsapEnc */
/****************************************/
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long mlen,
unsigned char *c
){
uint8_t state[ISAP_STATE_SZ] __attribute__((aligned(4)));
uint16_t *state16 = (uint16_t *)state;
uint16_t *c16 = (uint16_t *)c;
uint16_t *m16 = (uint16_t *)m;
isap_rk(k,ISAP_IV3,npub,CRYPTO_NPUBBYTES,(unsigned char *)state,ISAP_STATE_SZ-CRYPTO_NPUBBYTES);
KeccakP400_OverwriteBytes(state,npub,ISAP_STATE_SZ-CRYPTO_NPUBBYTES,CRYPTO_NPUBBYTES);
uint16_t ks16[9];
unsigned long idx16 = 0;
unsigned long rem_bytes = mlen;
// Squeeze Keystream
while(rem_bytes>=18){
KeccakP400_Permute_Nrounds(state,ISAP_sE);
c16[idx16+0] = m16[idx16+0] ^ state16[0];
c16[idx16+1] = m16[idx16+1] ^ state16[1];
c16[idx16+2] = m16[idx16+2] ^ state16[2];
c16[idx16+3] = m16[idx16+3] ^ state16[3];
c16[idx16+4] = m16[idx16+4] ^ state16[4];
c16[idx16+5] = m16[idx16+5] ^ state16[5];
c16[idx16+6] = m16[idx16+6] ^ state16[6];
c16[idx16+7] = m16[idx16+7] ^ state16[7];
c16[idx16+8] = m16[idx16+8] ^ state16[8];
rem_bytes -= 18;
idx16 += 9;
}
// Squeeze Keystream
if(rem_bytes>0){
KeccakP400_Permute_Nrounds(state,ISAP_sE);
unsigned long idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
c[idx8+i] = m[idx8+i] ^ state[i];
}
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 144
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 20
#define ISAP_sB 12
#define ISAP_sE 12
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 50
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH+7)/8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long adlen,
const unsigned char *c, const unsigned long clen,
unsigned char *tag
);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long inlen,
unsigned char *out,
const unsigned long outlen
);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long mlen,
unsigned char *c
);
#endif
/*
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
---
Please refer to SnP-documentation.h for more details.
*/
#ifndef _KeccakP_400_SnP_h_
#define _KeccakP_400_SnP_h_
#define KeccakP400_implementation "32-bit optimized ARM assembler implementation"
#define KeccakP400_stateSizeInBytes 50
#define KeccakP400_stateAlignment 4
/* void KeccakP400_StaticInitialize( void ); */
#define KeccakP400_StaticInitialize()
void KeccakP400_Initialize(void *state);
/* void KeccakP400_AddByte(void *state, unsigned char data, unsigned int offset); */
#define KeccakP400_AddByte(argS, argData, argOffset) ((unsigned char*)argS)[argOffset] ^= (argData)
void KeccakP400_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_OverwriteWithZeroes(void *state, unsigned int byteCount);
void KeccakP400_Permute_Nrounds(void *state, unsigned int nrounds);
void KeccakP400_Permute_20rounds(void *state);
void KeccakP400_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP400_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
#endif
@
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@
@ For more information, feedback or questions, please refer to our website:
@ https://keccak.team/
@
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ http://creativecommons.org/publicdomain/zero/1.0/
@
@ ---
@
@ This file implements Keccak-p[400] in a SnP-compatible way.
@ Please refer to SnP-documentation.h for more details.
@
@ This implementation comes with KeccakP-400-SnP.h in the same folder.
@ Please refer to LowLevel.build for the exact list of other files it must be combined with.
@
@ WARNING: This implementation assumes a little endian CPU with ARMv7M architecture (e.g., Cortex-M3) and the GCC compiler.
.thumb
.syntax unified
.text
@----------------------------------------------------------------------------
.equ _ba , 0*2
.equ _be , 1*2
.equ _bi , 2*2
.equ _bo , 3*2
.equ _bu , 4*2
.equ _ga , 5*2
.equ _ge , 6*2
.equ _gi , 7*2
.equ _go , 8*2
.equ _gu , 9*2
.equ _ka , 10*2
.equ _ke , 11*2
.equ _ki , 12*2
.equ _ko , 13*2
.equ _ku , 14*2
.equ _ma , 15*2
.equ _me , 16*2
.equ _mi , 17*2
.equ _mo , 18*2
.equ _mu , 19*2
.equ _sa , 20*2
.equ _se , 21*2
.equ _si , 22*2
.equ _so , 23*2
.equ _su , 24*2
.macro xor5 result,ptr,b,g,k,m,rs
ldrh \result, [\ptr, #\b]
ldrh r6, [\ptr, #\g]
eor \result, \result, \rs
ldrh \rs, [\ptr, #\k]
eor \result, \result, r6
ldrh r6, [\ptr, #\m]
eor \result, \result, \rs
eor \result, \result, r6
.endm
.macro xor5D resultL,resultH,ptr,b,g,k,m,rsL,rsH
bfi \rsL, \rsH, #16, #16
ldr \resultL, [\ptr, #\b]
ldr r6, [\ptr, #\g]
eor \resultL, \resultL, \rsL
ldr \rsL, [\ptr, #\k]
eor \resultL, \resultL, r6
ldr r6, [\ptr, #\m]
eor \resultL, \resultL, \rsL
eor \resultL, \resultL, r6
lsr \resultH, \resultL, #16
uxth \resultL, \resultL
.endm
.macro xorrol b, yy, rr
eor \b, \b, \yy
.if \rr != 8
lsl \b, \b, #\rr
orr \b, \b, \b, LSR #16
.else
rev16 \b, \b
.endif
.endm
.macro rolxor d, a, b
eor \d, \a, \b, LSL #1
eor \d, \d, \b, LSR #15
uxth \d, \d
.endm
.macro xandnot resptr, resofs, aa, bb, cc, temp
bic \temp, \cc, \bb
eor \temp, \temp, \aa
strh \temp, [\resptr, #\resofs]
.endm
.macro xandnotRC resptr, resofs, aa, bb, cc
ldrh r6, [r8], #2
bic \cc, \cc, \bb
eor \cc, \cc, r6
eor \cc, \cc, \aa
strh \cc, [\resptr, #\resofs]
.endm
.macro KeccakRound sOut, sIn
@prepTheta
xor5D r1, r2, \sIn, _ba, _ga, _ka, _ma, r9, r10
xor5D r3, r4, \sIn, _bi, _gi, _ki, _mi, r11, r12
rolxor r9, r7, r2
rolxor r10, r1, r3
rolxor r11, r2, r4
rolxor r12, r3, r7
rolxor lr, r4, r1
@thetaRhoPiChiIota
ldrh r1, [\sIn, #_ba]
ldrh r2, [\sIn, #_ge]
ldrh r3, [\sIn, #_ki]
ldrh r4, [\sIn, #_mo]
eor r1, r1, r9
xorrol r2, r10, 12
xorrol r3, r11, 11
xorrol r4, r12, 5
xorrol r5, lr, 14
xandnot \sOut, _be, r2, r3, r4, r6
xandnot \sOut, _bi, r3, r4, r5, r6
xandnot \sOut, _bo, r4, r5, r1, r6
xandnot \sOut, _bu, r5, r1, r2, r7
xandnotRC \sOut, _ba, r1, r2, r3
ldrh r1, [\sIn, #_bo]
ldrh r2, [\sIn, #_gu]
ldrh r3, [\sIn, #_ka]
ldrh r4, [\sIn, #_me]
ldrh r5, [\sIn, #_si]
xorrol r1, r12, 12
xorrol r2, lr, 4
xorrol r3, r9, 3
xorrol r4, r10, 13
xorrol r5, r11, 13
xandnot \sOut, _ga, r1, r2, r3, r6
xandnot \sOut, _ge, r2, r3, r4, r6
xandnot \sOut, _gi, r3, r4, r5, r6
xandnot \sOut, _go, r4, r5, r1, r6
xandnot \sOut, _gu, r5, r1, r2, r6
eor r7, r7, r6
ldrh r1, [\sIn, #_be]
ldrh r2, [\sIn, #_gi]
ldrh r3, [\sIn, #_ko]
ldrh r4, [\sIn, #_mu]
ldrh r5, [\sIn, #_sa]
xorrol r1, r10, 1
xorrol r2, r11, 6
xorrol r3, r12, 9
xorrol r4, lr, 8
xorrol r5, r9, 2
xandnot \sOut, _ka, r1, r2, r3, r6
xandnot \sOut, _ke, r2, r3, r4, r6
xandnot \sOut, _ki, r3, r4, r5, r6
xandnot \sOut, _ko, r4, r5, r1, r6
xandnot \sOut, _ku, r5, r1, r2, r6
eor r7, r7, r6
ldrh r1, [\sIn, #_bu]
ldrh r2, [\sIn, #_ga]
ldrh r3, [\sIn, #_ke]
ldrh r4, [\sIn, #_mi]
ldrh r5, [\sIn, #_so]
xorrol r1, lr, 11
xorrol r2, r9, 4
xorrol r3, r10, 10
xorrol r4, r11, 15
xorrol r5, r12, 8
xandnot \sOut, _ma, r1, r2, r3, r6
xandnot \sOut, _me, r2, r3, r4, r6
xandnot \sOut, _mi, r3, r4, r5, r6
xandnot \sOut, _mo, r4, r5, r1, r6
xandnot \sOut, _mu, r5, r1, r2, r6
eor r7, r7, r6
ldrh r1, [\sIn, #_bi]
ldrh r2, [\sIn, #_go]
ldrh r3, [\sIn, #_ku]
ldrh r4, [\sIn, #_ma]
ldrh r5, [\sIn, #_se]
xorrol r1, r11, 14
xorrol r2, r12, 7
xorrol r3, lr, 7
xorrol r4, r9, 9
xorrol r5, r10, 2
xandnot \sOut, _sa, r1, r2, r3, r9
xandnot \sOut, _se, r2, r3, r4, r10
xandnot \sOut, _si, r3, r4, r5, r11
xandnot \sOut, _so, r4, r5, r1, r12
bic r1, r2, r1
eor r5, r5, r1
eor r7, r7, r5
uxth r7, r7
strh r5, [\sOut, #_su]
uxth r5, r5
.endm
@----------------------------------------------------------------------------
@
@ void KeccakP400_StaticInitialize( void )
@
.align 8
.global KeccakP400_StaticInitialize
.type KeccakP400_StaticInitialize, %function;
KeccakP400_StaticInitialize:
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_Initialize(void *state)
@
.align 8
.global KeccakP400_Initialize
.type KeccakP400_Initialize, %function;
KeccakP400_Initialize:
movs r1, #0
movs r2, #0
movs r3, #0
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
strh r1, [r0]
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_AddByte(void *state, unsigned char byte, unsigned int offset)
@
.align 8
.global KeccakP400_AddByte
.type KeccakP400_AddByte, %function;
KeccakP400_AddByte:
ldrb r3, [r0, r2]
eors r3, r3, r1
strb r3, [r0, r2]
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 8
.global KeccakP400_AddBytes
.type KeccakP400_AddBytes, %function;
KeccakP400_AddBytes:
subs r3, r3, #1
bcc KeccakP400_AddBytes_Exit
adds r0, r0, r2
push {r4,lr}
KeccakP400_AddBytes_Loop:
ldrb r2, [r1, r3]
ldrb r4, [r0, r3]
eors r2, r2, r4
strb r2, [r0, r3]
subs r3, r3, #1
bcs KeccakP400_AddBytes_Loop
pop {r4,pc}
KeccakP400_AddBytes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 8
.global KeccakP400_OverwriteBytes
.type KeccakP400_OverwriteBytes, %function;
KeccakP400_OverwriteBytes:
subs r3, r3, #1
bcc KeccakP400_OverwriteBytes_Exit
adds r0, r0, r2
KeccakP400_OverwriteBytes_Loop:
ldrb r2, [r1, r3]
strb r2, [r0, r3]
subs r3, r3, #1
bcs KeccakP400_OverwriteBytes_Loop
KeccakP400_OverwriteBytes_Exit:
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_OverwriteWithZeroes(void *state, unsigned int byteCount)
@
.align 8
.global KeccakP400_OverwriteWithZeroes
.type KeccakP400_OverwriteWithZeroes, %function;
KeccakP400_OverwriteWithZeroes:
movs r3, #0
lsrs r2, r1, #2
beq KeccakP400_OverwriteWithZeroes_Bytes
KeccakP400_OverwriteWithZeroes_Loop2Lanes:
str r3, [r0], #4
subs r2, r2, #1
bne KeccakP400_OverwriteWithZeroes_Loop2Lanes
KeccakP400_OverwriteWithZeroes_Bytes:
ands r1, #3
beq KeccakP400_OverwriteWithZeroes_Exit
KeccakP400_OverwriteWithZeroes_LoopBytes:
strb r3, [r0], #1
subs r1, r1, #1
bne KeccakP400_OverwriteWithZeroes_LoopBytes
KeccakP400_OverwriteWithZeroes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 8
.global KeccakP400_ExtractBytes
.type KeccakP400_ExtractBytes, %function;
KeccakP400_ExtractBytes:
subs r3, r3, #1
bcc KeccakP400_ExtractBytes_Exit
adds r0, r0, r2
KeccakP400_ExtractBytes_Loop:
ldrb r2, [r0, r3]
strb r2, [r1, r3]
subs r3, r3, #1
bcs KeccakP400_ExtractBytes_Loop
KeccakP400_ExtractBytes_Exit:
bx lr
@ ----------------------------------------------------------------------------
@
@ void KeccakP400_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
@
.align 8
.global KeccakP400_ExtractAndAddBytes
.type KeccakP400_ExtractAndAddBytes, %function;
KeccakP400_ExtractAndAddBytes:
push {r4,r5}
adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length)
ldr r3, [sp, #8] @ get length argument from stack
subs r3, r3, #1 @ .if length != 0
bcc KeccakP400_ExtractAndAddBytes_Exit
KeccakP400_ExtractAndAddBytes_Loop:
ldrb r5, [r0, r3]
ldrb r4, [r1, r3]
eors r5, r5, r4
strb r5, [r2, r3]
subs r3, r3, #1
bcs KeccakP400_ExtractAndAddBytes_Loop
KeccakP400_ExtractAndAddBytes_Exit:
pop {r4,r5}
bx lr
@----------------------------------------------------------------------------
@
@ void KeccakP400_Permute_Nrounds( void *state, unsigned int nr )
@
.align 8
.global KeccakP400_Permute_Nrounds
.type KeccakP400_Permute_Nrounds, %function;
KeccakP400_Permute_Nrounds:
push {r4-r12,lr}
sub sp, sp, #2*25+6
adr r8, KeccakP400_Permute_RoundConstants
sub r8, r8, r1, LSL #1
lsls r1, r1, #31
beq KeccakP400_PermuteIntern
mov r1, sp @ odd number of rounds
ldm r0!, { r2, r3, r4, r5, r11, r12 } @ copy state to stack
stm r1!, { r2, r3, r4, r5, r11, r12 }
ldm r0!, { r2, r3, r4, r5, r11, r12 }
stm r1!, { r2, r3, r4, r5, r11, r12 }
sub r0, r0, #2*24
ldrh lr, [r0, #_su]
strh lr, [sp, #_su]
uxth r9, r11
uxth r10, r11, ROR #16
uxth r11, r12
lsr r12, r12, #16
mov r5, lr
xor5 r7, r0, _bu, _gu, _ku, _mu, lr
b KeccakP400_Permute_RoundOdd
@----------------------------------------------------------------------------
@
@ void KeccakP400_Permute_20rounds( void *state )
@
.align 8
.global KeccakP400_Permute_20rounds
.type KeccakP400_Permute_20rounds, %function;
KeccakP400_Permute_20rounds:
push {r4-r12,lr}
sub sp, sp, #2*25+6
adr r8, KeccakP400_Permute_RoundConstants20
KeccakP400_PermuteIntern:
ldrh r9, [r0, #_sa]
ldrh r10, [r0, #_se]
ldrh r11, [r0, #_si]
ldrh lr, [r0, #_su]
ldrh r12, [r0, #_so]
mov r5, lr
xor5 r7, r0, _bu, _gu, _ku, _mu, lr
KeccakP400_Permute_RoundLoop:
KeccakRound sp, r0
KeccakP400_Permute_RoundOdd:
KeccakRound r0, sp
ldrh r3, [r8]
cmp r3, #0
bne KeccakP400_Permute_RoundLoop
add sp,sp,#2*25+6
pop {r4-r12,pc}
.align 8
KeccakP400_Permute_RoundConstants20:
.short 0x0001
.short 0x8082
.short 0x808a
.short 0x8000
.short 0x808b
.short 0x0001
.short 0x8081
.short 0x8009
.short 0x008a
.short 0x0088
.short 0x8009
.short 0x000a
.short 0x808b
.short 0x008b
.short 0x8089
.short 0x8003
.short 0x8002
.short 0x0080
.short 0x800a
.short 0x000a
KeccakP400_Permute_RoundConstants:
.short 0 @ terminator
#ifndef API_H
#define API_H
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#endif
#include "api.h"
#include "isap.h"
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
){
(void)nsec;
// Ciphertext length is mlen + tag length
*clen = mlen+ISAP_TAG_SZ;
// Encrypt plaintext
if (mlen > 0) {
isap_enc(k,npub,m,mlen,c);
}
// Generate tag
unsigned char *tag = c+mlen;
isap_mac(k,npub,ad,adlen,c,mlen,tag);
return 0;
}
#include "api.h"
#include "isap.h"
#include "inttypes.h"
#include "KeccakP-400-SnP.h"
const unsigned char ISAP_IV1[] = {0x01,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const unsigned char ISAP_IV2[] = {0x02,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
const unsigned char ISAP_IV3[] = {0x03,ISAP_K,ISAP_rH,ISAP_rB,ISAP_sH,ISAP_sB,ISAP_sE,ISAP_sK};
/****************************************/
/* IsapRk */
/****************************************/
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long inlen,
unsigned char *out,
const unsigned long outlen
){
// Init State
unsigned char state[ISAP_STATE_SZ] __attribute__((aligned(4)));
KeccakP400_Initialize(state);
KeccakP400_AddBytes(state,k,0,CRYPTO_KEYBYTES);
KeccakP400_AddBytes(state,iv,CRYPTO_KEYBYTES,ISAP_IV_SZ);
KeccakP400_Permute_Nrounds(state,ISAP_sK);
// Absorb
for (unsigned long i = 0; i < inlen*8-1; i++){
unsigned long cur_byte_pos = i/8;
unsigned long cur_bit_pos = 7-(i%8);
unsigned char cur_bit = ((in[cur_byte_pos] >> (cur_bit_pos)) & 0x01) << 7;
state[0] ^= cur_bit;
KeccakP400_Permute_Nrounds(state,ISAP_sB);
}
unsigned char cur_bit = ((in[inlen-1]) & 0x01) << 7;
state[0] ^= cur_bit;
KeccakP400_Permute_Nrounds(state,ISAP_sK);
// Squeeze K*
KeccakP400_ExtractBytes(state,out,0,outlen);
}
/****************************************/
/* IsapMac */
/****************************************/
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long adlen,
const unsigned char *c, const unsigned long clen,
unsigned char *tag
){
// Init State
unsigned char state[ISAP_STATE_SZ] __attribute__ ((aligned(4)));
uint16_t *state16 = (uint16_t *)state;
uint16_t *ad16 = (uint16_t *)ad;
uint16_t *c16 = (uint16_t *)c;
KeccakP400_Initialize(state);
KeccakP400_AddBytes(state,npub,0,CRYPTO_NPUBBYTES);
KeccakP400_AddBytes(state,ISAP_IV1,CRYPTO_NPUBBYTES,ISAP_IV_SZ);
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Absorb AD
unsigned long idx16 = 0;
unsigned long rem_bytes = adlen;
while(rem_bytes>=18){
state16[0] ^= ad16[idx16+0];
state16[1] ^= ad16[idx16+1];
state16[2] ^= ad16[idx16+2];
state16[3] ^= ad16[idx16+3];
state16[4] ^= ad16[idx16+4];
state16[5] ^= ad16[idx16+5];
state16[6] ^= ad16[idx16+6];
state16[7] ^= ad16[idx16+7];
state16[8] ^= ad16[idx16+8];
rem_bytes -= 18;
idx16 += 9;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
}
uint32_t idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
state[i] ^= ad[idx8+i];
}
state[rem_bytes] ^= 0x80;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Domain Seperation
state[ISAP_STATE_SZ-1] ^= 0x01;
// Absorb C
idx16 = 0;
rem_bytes = clen;
while(rem_bytes>=18){
state16[0] ^= c16[idx16+0];
state16[1] ^= c16[idx16+1];
state16[2] ^= c16[idx16+2];
state16[3] ^= c16[idx16+3];
state16[4] ^= c16[idx16+4];
state16[5] ^= c16[idx16+5];
state16[6] ^= c16[idx16+6];
state16[7] ^= c16[idx16+7];
state16[8] ^= c16[idx16+8];
rem_bytes -= 18;
idx16 += 9;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
}
idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
state[i] ^= c[idx8+i];
}
state[rem_bytes] ^= 0x80;
KeccakP400_Permute_Nrounds(state,ISAP_sH);
// Derive Ka*
unsigned char y[CRYPTO_KEYBYTES];
unsigned char ka_star[CRYPTO_KEYBYTES];
KeccakP400_ExtractBytes(state,y,0,CRYPTO_KEYBYTES);
isap_rk(k,ISAP_IV2,y,CRYPTO_KEYBYTES,ka_star,CRYPTO_KEYBYTES);
// Squeezing Tag
KeccakP400_OverwriteBytes(state,ka_star,0,CRYPTO_KEYBYTES);
KeccakP400_Permute_Nrounds(state,ISAP_sH);
KeccakP400_ExtractBytes(state,tag,0,CRYPTO_KEYBYTES);
}
/****************************************/
/* IsapEnc */
/****************************************/
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long mlen,
unsigned char *c
){
uint8_t state[ISAP_STATE_SZ] __attribute__((aligned(4)));
uint16_t *state16 = (uint16_t *)state;
uint16_t *c16 = (uint16_t *)c;
uint16_t *m16 = (uint16_t *)m;
isap_rk(k,ISAP_IV3,npub,CRYPTO_NPUBBYTES,(unsigned char *)state,ISAP_STATE_SZ-CRYPTO_NPUBBYTES);
KeccakP400_OverwriteBytes(state,npub,ISAP_STATE_SZ-CRYPTO_NPUBBYTES,CRYPTO_NPUBBYTES);
uint16_t ks16[9];
unsigned long idx16 = 0;
unsigned long rem_bytes = mlen;
// Squeeze Keystream
while(rem_bytes>=18){
KeccakP400_Permute_Nrounds(state,ISAP_sE);
c16[idx16+0] = m16[idx16+0] ^ state16[0];
c16[idx16+1] = m16[idx16+1] ^ state16[1];
c16[idx16+2] = m16[idx16+2] ^ state16[2];
c16[idx16+3] = m16[idx16+3] ^ state16[3];
c16[idx16+4] = m16[idx16+4] ^ state16[4];
c16[idx16+5] = m16[idx16+5] ^ state16[5];
c16[idx16+6] = m16[idx16+6] ^ state16[6];
c16[idx16+7] = m16[idx16+7] ^ state16[7];
c16[idx16+8] = m16[idx16+8] ^ state16[8];
rem_bytes -= 18;
idx16 += 9;
}
// Squeeze Keystream
if(rem_bytes>0){
KeccakP400_Permute_Nrounds(state,ISAP_sE);
unsigned long idx8 = idx16*2;
for (uint32_t i = 0; i < rem_bytes; i++) {
c[idx8+i] = m[idx8+i] ^ state[i];
}
}
}
#ifndef ISAP_H
#define ISAP_H
// Rate in bits
#define ISAP_rH 144
#define ISAP_rB 1
// Number of rounds
#define ISAP_sH 20
#define ISAP_sB 12
#define ISAP_sE 12
#define ISAP_sK 12
// State size in bytes
#define ISAP_STATE_SZ 50
// Size of rate in bytes
#define ISAP_rH_SZ ((ISAP_rH+7)/8)
// Size of zero truncated IV in bytes
#define ISAP_IV_SZ 8
// Size of tag in bytes
#define ISAP_TAG_SZ 16
// Security level
#define ISAP_K 128
void isap_mac(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *ad, const unsigned long adlen,
const unsigned char *c, const unsigned long clen,
unsigned char *tag
);
void isap_rk(
const unsigned char *k,
const unsigned char *iv,
const unsigned char *in,
const unsigned long inlen,
unsigned char *out,
const unsigned long outlen
);
void isap_enc(
const unsigned char *k,
const unsigned char *npub,
const unsigned char *m, const unsigned long mlen,
unsigned char *c
);
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment