Commit 20add87b by Martin Schläffer Committed by Sebastian Renner

ascon

parent 9cd651e3
......@@ -5,11 +5,11 @@ enable_testing()
# set the default version, algorithms, implementations, tests, flags, defs
set(DEFAULT_VERSIONS v12)
set(DEFAULT_ALGS ascon128 ascon128a ascon80pq asconhash asconxof)
set(DEFAULT_IMPLS ref opt64 opt64_lowsize bi32 bi32_lowsize bi32_lowreg bi16 bi8)
set(DEFAULT_IMPLS ref opt64 opt64_lowsize bi32 bi32_lowsize bi32_lowreg opt8 bi8)
set(DEFAULT_TESTS genkat getcycles)
set(DEFAULT_REL_FLAGS -std=c99 -O2 -fomit-frame-pointer -march=native -mtune=native)
set(DEFAULT_DBG_FLAGS -std=c99 -O2 -Wall -Wextra -Wshadow)
set(DEFAULT_COMPILE_DEFS -DASCON_DATA_ACCESS='H')
set(DEFAULT_COMPILE_DEFS)
# set cmake variables for version, algorithms, implementations, tests, flags, defs
set(VERSION_LIST ${DEFAULT_VERSIONS} CACHE STRING "Choose the ascon versions to include.")
......
......@@ -19,10 +19,10 @@ and the following implementations:
- `opt64_lowsize`: 64-bit size-optimized C implementation
- `neon`: NEON speed-optimized ARM inline assembly implementation
- `bi32`: 32-bit speed-optimized bit-interleaved C implementation
- `bi32_lowsize`: 32-bit size-optimized bit-interleaved C implementation
- `bi32_lowreg`: 32-bit speed-optimized bit-interleaved C implementation (low register usage)
- `bi32_arm`: 32-bit speed-optimized bit-interleaved ARM inline assembly implementation
- `bi16`: 16-bit optimized bit-interleaved C implementation
- `bi32_lowreg`: 32-bit speed-optimized bit-interleaved C implementation (low register usage)
- `bi32_lowsize`: 32-bit size-optimized bit-interleaved C implementation
- `opt8`: 8-bit optimized C implementation
- `bi8`: 8-bit optimized bit-interleaved C implementation
......@@ -38,7 +38,7 @@ and the following implementations:
| Cortex-A15 (ARMv7)\* | | | | | 69.8 | 36.2 | 34.6 |
| Cortex-A7 (NEON) | 2182 | 249 | 148 | 97 | 71.7 | 47.5 | 46.5 |
| Cortex-A7 (ARMv7) | 1871 | 292 | 175 | 115 | 86.6 | 58.3 | 57.2 |
| ARM1176JZF-S (ARMv6) | 2189 | 340 | 202 | 133 | 97.9 | 64.4 | 65.3 |
| ARM1176JZF-S (ARMv6) | 2136 | 312 | 186 | 123 | 91.6 | 61.8 | 62.2 |
\* Results taken from eBACS: http://bench.cr.yp.to/
......@@ -55,7 +55,7 @@ and the following implementations:
| Cortex-A15 (ARMv7)\* | | | | | 60.3 | 25.3 | 23.8 |
| Cortex-A7 (NEON) | 2204 | 226 | 132 | 82 | 55.9 | 31.7 | 30.7 |
| Cortex-A7 (ARMv7) | 1911 | 255 | 161 | 102 | 71.3 | 42.3 | 41.2 |
| ARM1176JZF-S (ARMv6) | 2267 | 303 | 191 | 120 | 84.4 | 50.0 | 50.2 |
| ARM1176JZF-S (ARMv6) | 2118 | 261 | 170 | 107 | 75.6 | 46.0 | 46.6 |
\* Results taken from eBACS: http://bench.cr.yp.to/
......@@ -153,11 +153,17 @@ Get CPU cycles:
* Determine the scaling factor between the actual and base frequency:
- factor = actual frequency / base frequency
* Run the getcycles program using the frequency factor and watch the results:
* Run a getcycles program using the frequency factor and watch the results:
```
while true; do ./getcycles_crypto_aead_ascon128v12_opt64 $factor; done
```
* Run the `benchmark.sh` script with a specific algorithm and frequency factor
to benchmark all built implementations:
```
./benchmark.sh ascon128v12 $factor
```
## Hints to activate the performance monitor unit (PMU) on ARM CPUs:
......
#!/bin/sh
FACTOR=$1
ALG=$1
FACTOR=$2
for i in getcycles*; do
for i in getcycles*$ALG*; do
echo
echo $i:
echo
......
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
*K1 = XOR(*K1, LOAD64(k));
*K2 = XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1,
word_t K2) {
word_t N0, N1;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* initialization */
PINIT(s);
s->x0 = XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
s->x3 = XOR(s->x3, N0);
s->x4 = XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) *px = XOR(*px, LOAD(ad, adlen));
*px = XOR(*px, PAD(adlen));
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
s->x1 = XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
*px = XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
*px = XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
s->x1 = XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
*px = XOR(*px, cx);
STORE(m, *px, clen);
*px = CLEAR(*px, clen);
*px = XOR(*px, cx);
}
*px = XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) {
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
#if ASCON_INLINE_MODE
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#else
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
init(s, npub, K0, K1, K2);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
final(s, K0, K1, K2);
}
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, K0, K1, K2);
/* set tag */
c += mlen;
STOREBYTES(c, s.x3, 8);
STOREBYTES(c + 8, s.x4, 8);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, K0, K1, K2);
/* verify tag (should be constant time, check compiler output) */
c += clen;
s.x3 = XOR(s.x3, LOADBYTES(c, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
......@@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#endif /* ASCON_H */
......@@ -21,11 +21,6 @@
#define ASCON_UNROLL_LOOPS 0
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'M'
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
......
#ifndef INTERLEAVE_H_
#define INTERLEAVE_H_
#include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
return x;
}
__forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
return x;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
lo = deinterleave_uint32(lo);
hi = deinterleave_uint32(hi);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
lo = interleave_uint32(lo);
hi = interleave_uint32(hi);
return (uint64_t)hi << 32 | lo;
}
#endif /* INTERLEAVE_H_ */
......@@ -21,44 +21,23 @@
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#define ASCON_128_IV WORD_T(0x8021000008220000)
#define ASCON_128A_IV WORD_T(0x8822000000200000)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
......
......@@ -12,6 +12,7 @@
#include <stdio.h>
#include "ascon.h"
#include "word.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
......
......@@ -3,55 +3,51 @@
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "interleave.h"
typedef struct {
uint32_t e;
uint32_t o;
} word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline uint64_t TOBI32(uint64_t in);
__forceinline uint64_t FROMBI32(uint64_t in);
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline word_t U64TOWORD(uint64_t x) {
uint64_t w = TOBI32(x);
return (word_t){.o = w >> 32, .e = w};
}
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline uint64_t WORDTOU64(word_t w) {
return FROMBI32((uint64_t)w.o << 32 | w.e);
__forceinline word_t NOT(word_t a) {
a.e = ~a.e;
a.o = ~a.o;
return a;
}
#define XOR(a, b) \
do { \
word_t tb = b; \
(a).e ^= tb.e; \
(a).o ^= tb.o; \
} while (0)
#define AND(a, b) \
do { \
word_t tb = b; \
(a).e &= tb.e; \
(a).o &= tb.o; \
} while (0)
__forceinline uint32_t ROR32(uint32_t x, int n) {
return x >> n | x << (32 - n);
__forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e;
a.o ^= b.o;
return a;
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
__forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e;
a.o &= b.o;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
......@@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i];
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i];
return result;
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
__forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return WORD_T((uint64_t)mask << 32 | mask);
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t TOBI32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t FROMBI32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
return (uint64_t)hi << 32 | lo;
return AND(w, WORD_T((uint64_t)mask << 32 | mask));
}
__forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
#endif /* WORD_H_ */
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
*K1 = XOR(*K1, LOAD64(k));
*K2 = XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1,
word_t K2) {
word_t N0, N1;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* initialization */
PINIT(s);
s->x0 = XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
s->x3 = XOR(s->x3, N0);
s->x4 = XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) *px = XOR(*px, LOAD(ad, adlen));
*px = XOR(*px, PAD(adlen));
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
s->x1 = XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
*px = XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
*px = XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
s->x1 = XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
*px = XOR(*px, cx);
STORE(m, *px, clen);
*px = CLEAR(*px, clen);
*px = XOR(*px, cx);
}
*px = XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) {
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
#if ASCON_INLINE_MODE
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#else
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
init(s, npub, K0, K1, K2);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
final(s, K0, K1, K2);
}
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, K0, K1, K2);
/* set tag */
c += mlen;
STOREBYTES(c, s.x3, 8);
STOREBYTES(c + 8, s.x4, 8);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, K0, K1, K2);
/* verify tag (should be constant time, check compiler output) */
c += clen;
s.x3 = XOR(s.x3, LOADBYTES(c, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
......@@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#endif /* ASCON_H */
......@@ -21,11 +21,6 @@
#define ASCON_UNROLL_LOOPS 1
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'H'
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
......
#ifndef INTERLEAVE_H_
#define INTERLEAVE_H_
#include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
return x;
}
__forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
return x;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
lo = deinterleave_uint32(lo);
hi = deinterleave_uint32(hi);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
lo = interleave_uint32(lo);
hi = interleave_uint32(hi);
return (uint64_t)hi << 32 | lo;
}
#endif /* INTERLEAVE_H_ */
......@@ -21,44 +21,23 @@
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#define ASCON_128_IV WORD_T(0x8021000008220000)
#define ASCON_128A_IV WORD_T(0x8822000000200000)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
......
......@@ -12,6 +12,7 @@
#include <stdio.h>
#include "ascon.h"
#include "word.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
......
......@@ -3,55 +3,51 @@
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "interleave.h"
typedef struct {
uint32_t e;
uint32_t o;
} word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline uint64_t TOBI32(uint64_t in);
__forceinline uint64_t FROMBI32(uint64_t in);
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline word_t U64TOWORD(uint64_t x) {
uint64_t w = TOBI32(x);
return (word_t){.o = w >> 32, .e = w};
}
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline uint64_t WORDTOU64(word_t w) {
return FROMBI32((uint64_t)w.o << 32 | w.e);
__forceinline word_t NOT(word_t a) {
a.e = ~a.e;
a.o = ~a.o;
return a;
}
#define XOR(a, b) \
do { \
word_t tb = b; \
(a).e ^= tb.e; \
(a).o ^= tb.o; \
} while (0)
#define AND(a, b) \
do { \
word_t tb = b; \
(a).e &= tb.e; \
(a).o &= tb.o; \
} while (0)
__forceinline uint32_t ROR32(uint32_t x, int n) {
return x >> n | x << (32 - n);
__forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e;
a.o ^= b.o;
return a;
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
__forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e;
a.o &= b.o;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
......@@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i];
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i];
return result;
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
__forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return WORD_T((uint64_t)mask << 32 | mask);
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t TOBI32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t FROMBI32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
return (uint64_t)hi << 32 | lo;
return AND(w, WORD_T((uint64_t)mask << 32 | mask));
}
__forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
#endif /* WORD_H_ */
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
*K1 = XOR(*K1, LOAD64(k));
*K2 = XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1,
word_t K2) {
word_t N0, N1;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* initialization */
PINIT(s);
s->x0 = XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
s->x3 = XOR(s->x3, N0);
s->x4 = XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) *px = XOR(*px, LOAD(ad, adlen));
*px = XOR(*px, PAD(adlen));
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
s->x1 = XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
*px = XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
*px = XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
s->x1 = XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
*px = XOR(*px, cx);
STORE(m, *px, clen);
*px = CLEAR(*px, clen);
*px = XOR(*px, cx);
}
*px = XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) {
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
#if ASCON_INLINE_MODE
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#else
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
init(s, npub, K0, K1, K2);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
final(s, K0, K1, K2);
}
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, K0, K1, K2);
/* set tag */
c += mlen;
STOREBYTES(c, s.x3, 8);
STOREBYTES(c + 8, s.x4, 8);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, K0, K1, K2);
/* verify tag (should be constant time, check compiler output) */
c += clen;
s.x3 = XOR(s.x3, LOADBYTES(c, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
......@@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#endif /* ASCON_H */
......@@ -21,11 +21,6 @@
#define ASCON_UNROLL_LOOPS 1
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'M'
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
......
#ifndef INTERLEAVE_H_
#define INTERLEAVE_H_
#include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
return x;
}
__forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
return x;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
lo = deinterleave_uint32(lo);
hi = deinterleave_uint32(hi);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
lo = interleave_uint32(lo);
hi = interleave_uint32(hi);
return (uint64_t)hi << 32 | lo;
}
#endif /* INTERLEAVE_H_ */
......@@ -21,44 +21,23 @@
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#define ASCON_128_IV WORD_T(0x8021000008220000)
#define ASCON_128A_IV WORD_T(0x8822000000200000)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
......
......@@ -12,6 +12,7 @@
#include <stdio.h>
#include "ascon.h"
#include "word.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
......
......@@ -19,58 +19,34 @@ __forceinline void PINIT(state_t* s) {
}
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
uint32_t tmp_e, tmp_o;
word_t tmp, C = {.o = C_o, .e = C_e};
/* round constant */
s->x2.e ^= C_e;
s->x2.o ^= C_o;
s->x2 = XOR(s->x2, C);
/* s-box layer */
s->x0.e ^= s->x4.e;
s->x0.o ^= s->x4.o;
s->x4.e ^= s->x3.e;
s->x4.o ^= s->x3.o;
s->x2.e ^= s->x1.e;
s->x2.o ^= s->x1.o;
tmp_e = s->x0.e & (~s->x4.e);
tmp_o = s->x0.o & (~s->x4.o);
s->x0.e ^= s->x2.e & (~s->x1.e);
s->x0.o ^= s->x2.o & (~s->x1.o);
s->x2.e ^= s->x4.e & (~s->x3.e);
s->x2.o ^= s->x4.o & (~s->x3.o);
s->x4.e ^= s->x1.e & (~s->x0.e);
s->x4.o ^= s->x1.o & (~s->x0.o);
s->x1.e ^= s->x3.e & (~s->x2.e);
s->x1.o ^= s->x3.o & (~s->x2.o);
s->x3.e ^= tmp_e;
s->x3.o ^= tmp_o;
s->x1.e ^= s->x0.e;
s->x1.o ^= s->x0.o;
s->x3.e ^= s->x2.e;
s->x3.o ^= s->x2.o;
s->x0.e ^= s->x4.e;
s->x0.o ^= s->x4.o;
s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1);
tmp = AND(s->x0, NOT(s->x4));
s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1)));
s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3)));
s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0)));
s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2)));
s->x3 = XOR(s->x3, tmp);
s->x1 = XOR(s->x1, s->x0);
s->x3 = XOR(s->x3, s->x2);
s->x0 = XOR(s->x0, s->x4);
/* linear layer */
tmp_e = s->x0.e ^ ROR32(s->x0.o, 4);
tmp_o = s->x0.o ^ ROR32(s->x0.e, 5);
s->x0.e ^= ROR32(tmp_o, 9);
s->x0.o ^= ROR32(tmp_e, 10);
tmp_e = s->x1.e ^ ROR32(s->x1.e, 11);
tmp_o = s->x1.o ^ ROR32(s->x1.o, 11);
s->x1.e ^= ROR32(tmp_o, 19);
s->x1.o ^= ROR32(tmp_e, 20);
tmp_e = s->x2.e ^ ROR32(s->x2.o, 2);
tmp_o = s->x2.o ^ ROR32(s->x2.e, 3);
s->x2.e ^= tmp_o;
s->x2.o ^= ROR32(tmp_e, 1);
tmp_e = s->x3.e ^ ROR32(s->x3.o, 3);
tmp_o = s->x3.o ^ ROR32(s->x3.e, 4);
s->x3.e ^= ROR32(tmp_e, 5);
s->x3.o ^= ROR32(tmp_o, 5);
tmp_e = s->x4.e ^ ROR32(s->x4.e, 17);
tmp_o = s->x4.o ^ ROR32(s->x4.o, 17);
s->x4.e ^= ROR32(tmp_o, 3);
s->x4.o ^= ROR32(tmp_e, 4);
s->x2.e = ~s->x2.e;
s->x2.o = ~s->x2.o;
tmp = XOR(s->x0, ROR64(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR64(tmp, 19));
tmp = XOR(s->x1, ROR64(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR64(tmp, 39));
tmp = XOR(s->x2, ROR64(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR64(tmp, 1));
tmp = XOR(s->x3, ROR64(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR64(tmp, 10));
tmp = XOR(s->x4, ROR64(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR64(tmp, 7));
s->x2 = NOT(s->x2);
printstate(" round output", s);
}
......
......@@ -3,55 +3,51 @@
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "interleave.h"
typedef struct {
uint32_t e;
uint32_t o;
} word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline uint64_t TOBI32(uint64_t in);
__forceinline uint64_t FROMBI32(uint64_t in);
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline word_t U64TOWORD(uint64_t x) {
uint64_t w = TOBI32(x);
return (word_t){.o = w >> 32, .e = w};
}
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline uint64_t WORDTOU64(word_t w) {
return FROMBI32((uint64_t)w.o << 32 | w.e);
__forceinline word_t NOT(word_t a) {
a.e = ~a.e;
a.o = ~a.o;
return a;
}
#define XOR(a, b) \
do { \
word_t tb = b; \
(a).e ^= tb.e; \
(a).o ^= tb.o; \
} while (0)
#define AND(a, b) \
do { \
word_t tb = b; \
(a).e &= tb.e; \
(a).o &= tb.o; \
} while (0)
__forceinline uint32_t ROR32(uint32_t x, int n) {
return x >> n | x << (32 - n);
__forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e;
a.o ^= b.o;
return a;
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
__forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e;
a.o &= b.o;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
......@@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i];
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i];
return result;
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
__forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return WORD_T((uint64_t)mask << 32 | mask);
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t TOBI32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t FROMBI32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
return (uint64_t)hi << 32 | lo;
return AND(w, WORD_T((uint64_t)mask << 32 | mask));
}
__forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
#endif /* WORD_H_ */
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode);
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode) {
word_t K0, K1, K2;
/* load key */
if (CRYPTO_KEYBYTES == 20) {
K0 = KEYROT(WORD_T(0), LOAD(k, 4));
k += 4;
}
K1 = LOAD64(k);
K2 = LOAD64(k + 8);
/* initialization */
s->x0 = IV;
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = K1;
s->x2 = K2;
s->x3 = LOAD64(npub);
s->x4 = LOAD64(npub + 8);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
/* process associated data */
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_ABSORB);
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
/* process plaintext/ciphertext */
process_data(s, out, in, tlen, mode);
if (mode == ASCON_ENCRYPT) printstate("process plaintext", s);
if (mode == ASCON_DECRYPT) printstate("process ciphertext", s);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
......@@ -10,9 +10,11 @@ typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define ASCON_ABSORB 0x1
#define ASCON_SQUEEZE 0x2
#define ASCON_INSERT 0x4
#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE)
#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT)
void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode);
......@@ -21,4 +23,4 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode);
#endif // ASCON_H_
#endif /* ASCON_H */
......@@ -21,11 +21,6 @@
#define ASCON_UNROLL_LOOPS 1
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'B'
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
......
#include "api.h"
#include "ascon.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
......@@ -20,10 +19,10 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
/* set plaintext size */
*mlen = clen - CRYPTO_ABYTES;
/* ascon decryption */
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT);
/* verify tag (should be constant time, check compiler output) */
XOR(s.x3, LOAD64(c + *mlen));
XOR(s.x4, LOAD64(c + *mlen + 8));
s.x3 = XOR(s.x3, LOADBYTES(c + *mlen, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + *mlen + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
......
#include "api.h"
#include "ascon.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
......@@ -17,9 +16,9 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
/* set ciphertext size */
*clen = mlen + CRYPTO_ABYTES;
/* ascon encryption */
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT);
/* set tag */
STORE64(c + mlen, s.x3);
STORE64(c + mlen + 8, s.x4);
STOREBYTES(c + mlen, s.x3, 8);
STOREBYTES(c + mlen + 8, s.x4, 8);
return 0;
}
#include "interleave.h"
static inline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
return x;
}
static inline uint32_t interleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
return x;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
lo = deinterleave_uint32(lo);
hi = deinterleave_uint32(hi);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
uint64_t interleave32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
lo = interleave_uint32(lo);
hi = interleave_uint32(hi);
return (uint64_t)hi << 32 | lo;
}
#ifndef INTERLEAVE_H_
#define INTERLEAVE_H_
#include <stdint.h>
uint64_t deinterleave32(uint64_t in);
uint64_t interleave32(uint64_t in);
#endif /* INTERLEAVE_H_ */
......@@ -21,44 +21,23 @@
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#define ASCON_128_IV WORD_T(0x8021000008220000)
#define ASCON_128A_IV WORD_T(0x8822000000200000)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
......
......@@ -12,6 +12,7 @@
#include <stdio.h>
#include "ascon.h"
#include "word.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
......
#include "api.h"
#include "ascon.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
......@@ -11,13 +10,13 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
while (len >= ASCON_RATE) {
tmp0 = LOAD64(in);
tmp1 = LOAD64(in + 8);
XOR(s->x0, tmp0);
XOR(s->x1, tmp1);
if (mode != ASCON_AD) {
s->x0 = XOR(s->x0, tmp0);
s->x1 = XOR(s->x1, tmp1);
if (mode & ASCON_SQUEEZE) {
STORE64(out, s->x0);
STORE64(out + 8, s->x1);
}
if (mode == ASCON_DEC) {
if (mode & ASCON_INSERT) {
s->x0 = tmp0;
s->x1 = tmp1;
}
......@@ -34,28 +33,28 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
tmp1 = LOAD(in + 8, len - 8);
else
tmp0 = LOAD(in, len);
XOR(s->x0, tmp0);
XOR(s->x1, tmp1);
if (mode != ASCON_AD) {
s->x0 = XOR(s->x0, tmp0);
s->x1 = XOR(s->x1, tmp1);
if (mode & ASCON_SQUEEZE) {
if (len >= 8) STORE64(out, s->x0);
if (len > 8)
STORE(out + 8, s->x1, len - 8);
else
STORE(out, s->x0, len);
}
if (mode == ASCON_DEC) {
if (mode & ASCON_INSERT) {
if (len >= 8) s->x0 = tmp0;
if (len > 8) {
AND(s->x1, XMASK(len - 8));
XOR(s->x1, tmp1);
s->x1 = CLEAR(s->x1, len - 8);
s->x1 = XOR(s->x1, tmp1);
} else {
AND(s->x0, XMASK(len));
XOR(s->x0, tmp0);
s->x0 = CLEAR(s->x0, len);
s->x0 = XOR(s->x0, tmp0);
}
}
}
if (len < 8)
XOR(s->x0, PAD(len % 8));
s->x0 = XOR(s->x0, PAD(len % 8));
else
XOR(s->x1, PAD(len % 8));
s->x1 = XOR(s->x1, PAD(len % 8));
}
......@@ -19,66 +19,34 @@ __forceinline void PINIT(state_t* s) {
}
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
state_t t;
word_t tmp, C = {.o = C_o, .e = C_e};
/* round constant */
s->x2.e ^= C_e;
s->x2.o ^= C_o;
s->x2 = XOR(s->x2, C);
/* s-box layer */
s->x0.e ^= s->x4.e;
s->x0.o ^= s->x4.o;
s->x4.e ^= s->x3.e;
s->x4.o ^= s->x3.o;
s->x2.e ^= s->x1.e;
s->x2.o ^= s->x1.o;
t.x0.e = s->x0.e;
t.x0.o = s->x0.o;
t.x4.e = s->x4.e;
t.x4.o = s->x4.o;
t.x3.e = s->x3.e;
t.x3.o = s->x3.o;
t.x1.e = s->x1.e;
t.x1.o = s->x1.o;
t.x2.e = s->x2.e;
t.x2.o = s->x2.o;
s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e);
s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o);
s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e);
s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o);
s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e);
s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o);
s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e);
s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o);
s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e);
s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o);
s->x1.e ^= s->x0.e;
s->x1.o ^= s->x0.o;
s->x3.e ^= s->x2.e;
s->x3.o ^= s->x2.o;
s->x0.e ^= s->x4.e;
s->x0.o ^= s->x4.o;
s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1);
tmp = AND(s->x0, NOT(s->x4));
s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1)));
s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3)));
s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0)));
s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2)));
s->x3 = XOR(s->x3, tmp);
s->x1 = XOR(s->x1, s->x0);
s->x3 = XOR(s->x3, s->x2);
s->x0 = XOR(s->x0, s->x4);
/* linear layer */
t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4);
t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5);
t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11);
t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11);
t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2);
t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3);
t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3);
t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4);
t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17);
t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17);
s->x0.e ^= ROR32(t.x0.o, 9);
s->x0.o ^= ROR32(t.x0.e, 10);
s->x1.e ^= ROR32(t.x1.o, 19);
s->x1.o ^= ROR32(t.x1.e, 20);
s->x2.e ^= t.x2.o;
s->x2.o ^= ROR32(t.x2.e, 1);
s->x3.e ^= ROR32(t.x3.e, 5);
s->x3.o ^= ROR32(t.x3.o, 5);
s->x4.e ^= ROR32(t.x4.o, 3);
s->x4.o ^= ROR32(t.x4.e, 4);
s->x2.e = ~s->x2.e;
s->x2.o = ~s->x2.o;
tmp = XOR(s->x0, ROR64(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR64(tmp, 19));
tmp = XOR(s->x1, ROR64(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR64(tmp, 39));
tmp = XOR(s->x2, ROR64(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR64(tmp, 1));
tmp = XOR(s->x3, ROR64(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR64(tmp, 10));
tmp = XOR(s->x4, ROR64(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR64(tmp, 7));
s->x2 = NOT(s->x2);
printstate(" round output", s);
}
......
......@@ -3,55 +3,51 @@
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "interleave.h"
typedef struct {
uint32_t e;
uint32_t o;
} word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
uint64_t TOBI32(uint64_t in);
uint64_t FROMBI32(uint64_t in);
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline word_t U64TOWORD(uint64_t x) {
uint64_t w = TOBI32(x);
return (word_t){.o = w >> 32, .e = w};
}
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline uint64_t WORDTOU64(word_t w) {
return FROMBI32((uint64_t)w.o << 32 | w.e);
__forceinline word_t NOT(word_t a) {
a.e = ~a.e;
a.o = ~a.o;
return a;
}
#define XOR(a, b) \
do { \
word_t tb = b; \
(a).e ^= tb.e; \
(a).o ^= tb.o; \
} while (0)
#define AND(a, b) \
do { \
word_t tb = b; \
(a).e &= tb.e; \
(a).o &= tb.o; \
} while (0)
__forceinline uint32_t ROR32(uint32_t x, int n) {
return x >> n | x << (32 - n);
__forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e;
a.o ^= b.o;
return a;
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
__forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e;
a.o &= b.o;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
......@@ -61,22 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i];
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i];
return result;
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
__forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return WORD_T((uint64_t)mask << 32 | mask);
return AND(w, WORD_T((uint64_t)mask << 32 | mask));
}
__forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
#endif /* WORD_H_ */
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
*K1 = XOR(*K1, LOAD64(k));
*K2 = XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1,
word_t K2) {
word_t N0, N1;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* initialization */
PINIT(s);
s->x0 = XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
s->x3 = XOR(s->x3, N0);
s->x4 = XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) *px = XOR(*px, LOAD(ad, adlen));
*px = XOR(*px, PAD(adlen));
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
s->x1 = XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
*px = XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
*px = XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
s->x1 = XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
*px = XOR(*px, cx);
STORE(m, *px, clen);
*px = CLEAR(*px, clen);
*px = XOR(*px, cx);
}
*px = XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) {
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
#if ASCON_INLINE_MODE
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#else
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
init(s, npub, K0, K1, K2);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
final(s, K0, K1, K2);
}
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, K0, K1, K2);
/* set tag */
c += mlen;
STOREBYTES(c, s.x3, 8);
STOREBYTES(c + 8, s.x4, 8);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, K0, K1, K2);
/* verify tag (should be constant time, check compiler output) */
c += clen;
s.x3 = XOR(s.x3, LOADBYTES(c, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif /* ASCON_H */
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 1
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif /* ENDIAN_H_ */
#include "interleave.h"
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
uint64_t interleave8(uint64_t x) {
x = (x & 0xaa55aa55aa55aa55ull) | ((x & 0x00aa00aa00aa00aaull) << 7) |
((x >> 7) & 0x00aa00aa00aa00aaull);
x = (x & 0xcccc3333cccc3333ull) | ((x & 0x0000cccc0000ccccull) << 14) |
((x >> 14) & 0x0000cccc0000ccccull);
x = (x & 0xf0f0f0f00f0f0f0full) | ((x & 0x00000000f0f0f0f0ull) << 28) |
((x >> 28) & 0x00000000f0f0f0f0ull);
return x;
}
#ifndef INTERLEAVE_H_
#define INTERLEAVE_H_
#include <stdint.h>
uint64_t interleave8(uint64_t x);
#endif /* INTERLEAVE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint64_t constants[12] = {
0x0101010100000000ull, 0x0101010000000001ull, 0x0101000100000100ull,
0x0101000000000101ull, 0x0100010100010000ull, 0x0100010000010001ull,
0x0100000100010100ull, 0x0100000000010101ull, 0x0001010101000000ull,
0x0001010001000001ull, 0x0001000101000100ull, 0x0001000001000101ull};
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; ++i) ROUND(s, constants[i]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x8040000020301000ull)
#define ASCON_128A_IV WORD_T(0xc000000030200000ull)
#define ASCON_80PQ_IV WORD_T(0x8040800020301000ull)
#define ASCON_HASH_IV WORD_T(0x0040000020200002ull)
#define ASCON_XOF_IV WORD_T(0x0040000020200000ull)
#define ASCON_HASH_IV0 WORD_T(0xfa8e976bb985dc4dull)
#define ASCON_HASH_IV1 WORD_T(0xc8085072a40ccd94ull)
#define ASCON_HASH_IV2 WORD_T(0xfe1781be5a847314ull)
#define ASCON_HASH_IV3 WORD_T(0x2f871f6c6d0082b2ull)
#define ASCON_HASH_IV4 WORD_T(0x7a1ba68850ec407eull)
#define ASCON_XOF_IV0 WORD_T(0x8a46f0d354e771b8ull)
#define ASCON_XOF_IV1 WORD_T(0x04489f4084368cd0ull)
#define ASCON_XOF_IV2 WORD_T(0x6c94f2150dbcf66cull)
#define ASCON_XOF_IV3 WORD_T(0x48965294f143b44eull)
#define ASCON_XOF_IV4 WORD_T(0x0788515fe0e5fb8aull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0x0101010100000000ull);
ROUND(s, 0x0101010000000001ull);
ROUND(s, 0x0101000100000100ull);
ROUND(s, 0x0101000000000101ull);
ROUND(s, 0x0100010100010000ull);
ROUND(s, 0x0100010000010001ull);
ROUND(s, 0x0100000100010100ull);
ROUND(s, 0x0100000000010101ull);
ROUND(s, 0x0001010101000000ull);
ROUND(s, 0x0001010001000001ull);
ROUND(s, 0x0001000101000100ull);
ROUND(s, 0x0001000001000101ull);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x0100010100010000ull);
ROUND(s, 0x0100010000010001ull);
ROUND(s, 0x0100000100010100ull);
ROUND(s, 0x0100000000010101ull);
ROUND(s, 0x0001010101000000ull);
ROUND(s, 0x0001010001000001ull);
ROUND(s, 0x0001000101000100ull);
ROUND(s, 0x0001000001000101ull);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x0100000100010100ull);
ROUND(s, 0x0100000000010101ull);
ROUND(s, 0x0001010101000000ull);
ROUND(s, 0x0001010001000001ull);
ROUND(s, 0x0001000101000100ull);
ROUND(s, 0x0001000001000101ull);
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint64_t constants[12];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; ++i) ROUND(s, constants[i]);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; ++i) ROUND(s, constants[i]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; ++i) ROUND(s, constants[i]);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
#include "word.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
s->x3 = WORD_T(0);
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint64_t C) {
word_t tmp;
/* round constant */
s->x2 = XOR(s->x2, WORD_T(C));
/* s-box layer */
s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1);
tmp = AND(s->x0, NOT(s->x4));
s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1)));
s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3)));
s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0)));
s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2)));
s->x3 = XOR(s->x3, tmp);
s->x1 = XOR(s->x1, s->x0);
s->x3 = XOR(s->x3, s->x2);
s->x0 = XOR(s->x0, s->x4);
/* linear layer */
tmp = XOR(s->x0, ROR64(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR64(tmp, 19));
tmp = XOR(s->x1, ROR64(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR64(tmp, 39));
tmp = XOR(s->x2, ROR64(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR64(tmp, 1));
tmp = XOR(s->x3, ROR64(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR64(tmp, 10));
tmp = XOR(s->x4, ROR64(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR64(tmp, 7));
s->x2 = NOT(s->x2);
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "endian.h"
#include "interleave.h"
typedef union {
uint64_t w;
uint8_t b[8];
} word_t;
__forceinline uint8_t ROR8(uint8_t a, int n) { return a >> n | a << (8 - n); }
__forceinline word_t ROR64(word_t a, int n) {
word_t b;
b.b[0] = ROR8(a.b[(n + 0) & 0x7], (n + 0) >> 3);
b.b[1] = ROR8(a.b[(n + 1) & 0x7], (n + 1) >> 3);
b.b[2] = ROR8(a.b[(n + 2) & 0x7], (n + 2) >> 3);
b.b[3] = ROR8(a.b[(n + 3) & 0x7], (n + 3) >> 3);
b.b[4] = ROR8(a.b[(n + 4) & 0x7], (n + 4) >> 3);
b.b[5] = ROR8(a.b[(n + 5) & 0x7], (n + 5) >> 3);
b.b[6] = ROR8(a.b[(n + 6) & 0x7], (n + 6) >> 3);
b.b[7] = ROR8(a.b[(n + 7) & 0x7], (n + 7) >> 3);
return b;
}
__forceinline word_t WORD_T(uint64_t x) {
word_t w;
w.w = x;
return w;
}
__forceinline uint64_t UINT64_T(word_t w) {
uint64_t x;
x = w.w;
return x;
}
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(interleave8(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave8(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) {
a.w = ~a.w;
return a;
}
__forceinline word_t XOR(word_t a, word_t b) {
a.w ^= b.w;
return a;
}
__forceinline word_t AND(word_t a, word_t b) {
a.w &= b.w;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t w;
w.w = lo2hi.w << 32 | hi2lo.w >> 32;
return w;
}
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
uint64_t result = a.w | b.w;
result |= result >> 32;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
}
__forceinline word_t PAD(int i) { return (word_t){.b[7] = 0x80 >> i}; }
__forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint8_t m = 0xff >> n;
word_t mask = {
.b[0] = m,
.b[1] = m,
.b[2] = m,
.b[3] = m,
.b[4] = m,
.b[5] = m,
.b[6] = m,
.b[7] = m,
};
return AND(w, mask);
}
__forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
#endif /* WORD_H_ */
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
#endif /* ASCON_H */
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 1
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef struct {
uint64_t x0, x1, x2, x3, x4;
} state_t;
#include "ascon.h"
#include "config.h"
#include "round.h"
static const uint64_t C[12] = {
0xffffffffffffff0full, 0xffffffffffffff1eull, 0xffffffffffffff2dull,
......@@ -12,52 +12,6 @@ static const uint64_t C[12] = {
0xffffffffffffff96ull, 0xffffffffffffffa5ull, 0xffffffffffffffb4ull,
};
/* clang-format off */
#define ROUND(OFFSET) \
"vldr d31, [%[C], #" #OFFSET "] \n\t" \
"veor d0, d0, d4 \n\t" \
"veor d4, d4, d3 \n\t" \
"veor d2, d2, d31 \n\t" \
"vbic d13, d0, d4 \n\t" \
"vbic d12, d4, d3 \n\t" \
"veor d2, d2, d1 \n\t" \
"vbic d14, d1, d0 \n\t" \
"vbic d11, d3, d2 \n\t" \
"vbic d10, d2, d1 \n\t" \
"veor q0, q0, q5 \n\t" \
"veor q1, q1, q6 \n\t" \
"veor d4, d4, d14 \n\t" \
"veor d1, d1, d0 \n\t" \
"veor d3, d3, d2 \n\t" \
"veor d0, d0, d4 \n\t" \
"vsri.64 d14, d4, #7 \n\t" \
"vsri.64 d24, d4, #41 \n\t" \
"vsri.64 d11, d1, #39 \n\t" \
"vsri.64 d21, d1, #61 \n\t" \
"vsri.64 d10, d0, #19 \n\t" \
"vsri.64 d20, d0, #28 \n\t" \
"vsri.64 d12, d2, #1 \n\t" \
"vsri.64 d22, d2, #6 \n\t" \
"vsri.64 d13, d3, #10 \n\t" \
"vsri.64 d23, d3, #17 \n\t" \
"vsli.64 d10, d0, #45 \n\t" \
"vsli.64 d20, d0, #36 \n\t" \
"vsli.64 d11, d1, #25 \n\t" \
"vsli.64 d21, d1, #3 \n\t" \
"vsli.64 d12, d2, #63 \n\t" \
"vsli.64 d22, d2, #58 \n\t" \
"vsli.64 d13, d3, #54 \n\t" \
"vsli.64 d23, d3, #47 \n\t" \
"vsli.64 d14, d4, #57 \n\t" \
"vsli.64 d24, d4, #23 \n\t" \
"veor q5, q5, q0 \n\t" \
"veor q6, q6, q1 \n\t" \
"veor d14, d14, d4 \n\t" \
"veor q0, q5, q10 \n\t" \
"veor d4, d14, d24 \n\t" \
"veor q1, q6, q11 \n\t"
/* clang-format on */
#define P12() \
__asm__ __volatile__ ( \
".arm \n\t" \
......
#ifndef ROUND_H_
#define ROUND_H_
/* clang-format off */
#define ROUND(OFFSET) \
"vldr d31, [%[C], #" #OFFSET "] \n\t" \
"veor d0, d0, d4 \n\t" \
"veor d4, d4, d3 \n\t" \
"veor d2, d2, d31 \n\t" \
"vbic d13, d0, d4 \n\t" \
"vbic d12, d4, d3 \n\t" \
"veor d2, d2, d1 \n\t" \
"vbic d14, d1, d0 \n\t" \
"vbic d11, d3, d2 \n\t" \
"vbic d10, d2, d1 \n\t" \
"veor q0, q0, q5 \n\t" \
"veor q1, q1, q6 \n\t" \
"veor d4, d4, d14 \n\t" \
"veor d1, d1, d0 \n\t" \
"veor d3, d3, d2 \n\t" \
"veor d0, d0, d4 \n\t" \
"vsri.64 d14, d4, #7 \n\t" \
"vsri.64 d24, d4, #41 \n\t" \
"vsri.64 d11, d1, #39 \n\t" \
"vsri.64 d21, d1, #61 \n\t" \
"vsri.64 d10, d0, #19 \n\t" \
"vsri.64 d20, d0, #28 \n\t" \
"vsri.64 d12, d2, #1 \n\t" \
"vsri.64 d22, d2, #6 \n\t" \
"vsri.64 d13, d3, #10 \n\t" \
"vsri.64 d23, d3, #17 \n\t" \
"vsli.64 d10, d0, #45 \n\t" \
"vsli.64 d20, d0, #36 \n\t" \
"vsli.64 d11, d1, #25 \n\t" \
"vsli.64 d21, d1, #3 \n\t" \
"vsli.64 d12, d2, #63 \n\t" \
"vsli.64 d22, d2, #58 \n\t" \
"vsli.64 d13, d3, #54 \n\t" \
"vsli.64 d23, d3, #47 \n\t" \
"vsli.64 d14, d4, #57 \n\t" \
"vsli.64 d24, d4, #23 \n\t" \
"veor q5, q5, q0 \n\t" \
"veor q6, q6, q1 \n\t" \
"veor d14, d14, d4 \n\t" \
"veor q0, q5, q10 \n\t" \
"veor d4, d14, d24 \n\t" \
"veor q1, q6, q11 \n\t"
/* clang-format on */
#endif /* ROUND_H_ */
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#define WORDTOU64
#define U64TOWORD
typedef uint64_t word_t;
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
/* set padding byte in Ascon 64-bit word */
#define PAD(i) SETBYTE(0x80, i)
static inline uint64_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i);
return x;
}
static inline void STOREBYTES(uint8_t* bytes, uint64_t x, int n) {
for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i);
}
static inline uint64_t CLEARBYTES(uint64_t x, int n) {
for (int i = 0; i < n; ++i) x &= ~SETBYTE(0xff, i);
return x;
}
#endif /* WORD_H_ */
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
*K1 = XOR(*K1, LOAD64(k));
*K2 = XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1,
word_t K2) {
word_t N0, N1;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* initialization */
PINIT(s);
s->x0 = XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
s->x3 = XOR(s->x3, N0);
s->x4 = XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) *px = XOR(*px, LOAD(ad, adlen));
*px = XOR(*px, PAD(adlen));
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
s->x1 = XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
*px = XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
*px = XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
s->x1 = XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
*px = XOR(*px, cx);
STORE(m, *px, clen);
*px = CLEAR(*px, clen);
*px = XOR(*px, cx);
}
*px = XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) {
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
#if ASCON_INLINE_MODE
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#else
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
init(s, npub, K0, K1, K2);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
final(s, K0, K1, K2);
}
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, K0, K1, K2);
/* set tag */
c += mlen;
STOREBYTES(c, s.x3, 8);
STOREBYTES(c + 8, s.x4, 8);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, K0, K1, K2);
/* verify tag (should be constant time, check compiler output) */
c += clen;
s.x3 = XOR(s.x3, LOADBYTES(c, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
......@@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#endif /* ASCON_H */
......@@ -21,11 +21,6 @@
#define ASCON_UNROLL_LOOPS 1
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'H'
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
......
......@@ -21,44 +21,23 @@
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#define ASCON_128_IV WORD_T(0x80400c0600000000)
#define ASCON_128A_IV WORD_T(0x80800c0800000000)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000)
#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
......
......@@ -12,6 +12,7 @@
#include <stdio.h>
#include "ascon.h"
#include "word.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
......
......@@ -3,45 +3,78 @@
#include <stdint.h>
#include "config.h"
#include "endian.h"
typedef uint64_t word_t;
#define WORD_T
#define UINT64_T
#define U64TOWORD
#define WORDTOU64
#define XOR(a, b) \
do { \
(a) ^= (b); \
} while (0)
__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); }
#define AND(a, b) \
do { \
(a) &= (b); \
} while (0)
__forceinline word_t NOT(word_t a) { return ~a; }
__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); }
__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; }
__forceinline word_t AND(word_t a, word_t b) { return a & b; }
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
return lo2hi << 32 | hi2lo >> 32;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i];
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i];
return result;
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
uint64_t result = a | b;
result |= result >> 32;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); }
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8));
__forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8);
return AND(w, WORD_T(mask));
}
__forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
#endif /* WORD_H_ */
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode);
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode) {
word_t K0, K1, K2;
/* load key */
if (CRYPTO_KEYBYTES == 20) {
K0 = KEYROT(WORD_T(0), LOAD(k, 4));
k += 4;
}
K1 = LOAD64(k);
K2 = LOAD64(k + 8);
/* initialization */
s->x0 = IV;
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = K1;
s->x2 = K2;
s->x3 = LOAD64(npub);
s->x4 = LOAD64(npub + 8);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
/* process associated data */
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_ABSORB);
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
/* process plaintext/ciphertext */
process_data(s, out, in, tlen, mode);
if (mode == ASCON_ENCRYPT) printstate("process plaintext", s);
if (mode == ASCON_DECRYPT) printstate("process ciphertext", s);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
......@@ -10,9 +10,11 @@ typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define ASCON_ABSORB 0x1
#define ASCON_SQUEEZE 0x2
#define ASCON_INSERT 0x4
#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE)
#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT)
void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode);
......@@ -21,4 +23,4 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode);
#endif // ASCON_H_
#endif /* ASCON_H */
......@@ -21,11 +21,6 @@
#define ASCON_UNROLL_LOOPS 1
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'H'
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
......
#include "api.h"
#include "ascon.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
......@@ -20,10 +19,10 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
/* set plaintext size */
*mlen = clen - CRYPTO_ABYTES;
/* ascon decryption */
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT);
/* verify tag (should be constant time, check compiler output) */
XOR(s.x3, LOAD64(c + *mlen));
XOR(s.x4, LOAD64(c + *mlen + 8));
s.x3 = XOR(s.x3, LOADBYTES(c + *mlen, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + *mlen + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
......
#include "api.h"
#include "ascon.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
......@@ -17,9 +16,9 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
/* set ciphertext size */
*clen = mlen + CRYPTO_ABYTES;
/* ascon encryption */
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT);
/* set tag */
STORE64(c + mlen, s.x3);
STORE64(c + mlen + 8, s.x4);
STOREBYTES(c + mlen, s.x3, 8);
STOREBYTES(c + mlen + 8, s.x4, 8);
return 0;
}
......@@ -21,44 +21,23 @@
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#define ASCON_128_IV WORD_T(0x80400c0600000000)
#define ASCON_128A_IV WORD_T(0x80800c0800000000)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000)
#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
......
......@@ -12,6 +12,7 @@
#include <stdio.h>
#include "ascon.h"
#include "word.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
......
#include "api.h"
#include "ascon.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
......@@ -11,13 +10,13 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
while (len >= ASCON_RATE) {
tmp0 = LOAD64(in);
tmp1 = LOAD64(in + 8);
XOR(s->x0, tmp0);
XOR(s->x1, tmp1);
if (mode != ASCON_AD) {
s->x0 = XOR(s->x0, tmp0);
s->x1 = XOR(s->x1, tmp1);
if (mode & ASCON_SQUEEZE) {
STORE64(out, s->x0);
STORE64(out + 8, s->x1);
}
if (mode == ASCON_DEC) {
if (mode & ASCON_INSERT) {
s->x0 = tmp0;
s->x1 = tmp1;
}
......@@ -34,28 +33,28 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
tmp1 = LOAD(in + 8, len - 8);
else
tmp0 = LOAD(in, len);
XOR(s->x0, tmp0);
XOR(s->x1, tmp1);
if (mode != ASCON_AD) {
s->x0 = XOR(s->x0, tmp0);
s->x1 = XOR(s->x1, tmp1);
if (mode & ASCON_SQUEEZE) {
if (len >= 8) STORE64(out, s->x0);
if (len > 8)
STORE(out + 8, s->x1, len - 8);
else
STORE(out, s->x0, len);
}
if (mode == ASCON_DEC) {
if (mode & ASCON_INSERT) {
if (len >= 8) s->x0 = tmp0;
if (len > 8) {
AND(s->x1, XMASK(len - 8));
XOR(s->x1, tmp1);
s->x1 = CLEAR(s->x1, len - 8);
s->x1 = XOR(s->x1, tmp1);
} else {
AND(s->x0, XMASK(len));
XOR(s->x0, tmp0);
s->x0 = CLEAR(s->x0, len);
s->x0 = XOR(s->x0, tmp0);
}
}
}
if (len < 8)
XOR(s->x0, PAD(len % 8));
s->x0 = XOR(s->x0, PAD(len % 8));
else
XOR(s->x1, PAD(len % 8));
s->x1 = XOR(s->x1, PAD(len % 8));
}
......@@ -19,50 +19,34 @@ __forceinline void PINIT(state_t* s) {
}
__forceinline void ROUND(state_t* s, uint64_t C) {
state_t t;
s->x2 ^= C;
s->x0 ^= s->x4;
s->x4 ^= s->x3;
s->x2 ^= s->x1;
t.x0 = s->x0;
t.x4 = s->x4;
t.x3 = s->x3;
t.x1 = s->x1;
t.x2 = s->x2;
s->x0 = t.x0 ^ (~t.x1 & t.x2);
s->x2 = t.x2 ^ (~t.x3 & t.x4);
s->x4 = t.x4 ^ (~t.x0 & t.x1);
s->x1 = t.x1 ^ (~t.x2 & t.x3);
s->x3 = t.x3 ^ (~t.x4 & t.x0);
s->x1 ^= s->x0;
t.x1 = s->x1;
s->x1 = ROR64(s->x1, 39);
s->x3 ^= s->x2;
t.x2 = s->x2;
s->x2 = ROR64(s->x2, 1);
t.x4 = s->x4;
t.x2 ^= s->x2;
s->x2 = ROR64(s->x2, 6 - 1);
t.x3 = s->x3;
t.x1 ^= s->x1;
s->x3 = ROR64(s->x3, 10);
s->x0 ^= s->x4;
s->x4 = ROR64(s->x4, 7);
t.x3 ^= s->x3;
s->x2 ^= t.x2;
s->x1 = ROR64(s->x1, 61 - 39);
t.x0 = s->x0;
s->x2 = ~s->x2;
s->x3 = ROR64(s->x3, 17 - 10);
t.x4 ^= s->x4;
s->x4 = ROR64(s->x4, 41 - 7);
s->x3 ^= t.x3;
s->x1 ^= t.x1;
s->x0 = ROR64(s->x0, 19);
s->x4 ^= t.x4;
t.x0 ^= s->x0;
s->x0 = ROR64(s->x0, 28 - 19);
s->x0 ^= t.x0;
word_t tmp;
/* round constant */
s->x2 = XOR(s->x2, C);
/* s-box layer */
s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1);
tmp = AND(s->x0, NOT(s->x4));
s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1)));
s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3)));
s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0)));
s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2)));
s->x3 = XOR(s->x3, tmp);
s->x1 = XOR(s->x1, s->x0);
s->x3 = XOR(s->x3, s->x2);
s->x0 = XOR(s->x0, s->x4);
/* linear layer */
tmp = XOR(s->x0, ROR64(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR64(tmp, 19));
tmp = XOR(s->x1, ROR64(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR64(tmp, 39));
tmp = XOR(s->x2, ROR64(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR64(tmp, 1));
tmp = XOR(s->x3, ROR64(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR64(tmp, 10));
tmp = XOR(s->x4, ROR64(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR64(tmp, 7));
s->x2 = NOT(s->x2);
printstate(" round output", s);
}
......
......@@ -3,45 +3,78 @@
#include <stdint.h>
#include "config.h"
#include "endian.h"
typedef uint64_t word_t;
#define WORD_T
#define UINT64_T
#define U64TOWORD
#define WORDTOU64
#define XOR(a, b) \
do { \
(a) ^= (b); \
} while (0)
__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); }
#define AND(a, b) \
do { \
(a) &= (b); \
} while (0)
__forceinline word_t NOT(word_t a) { return ~a; }
__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); }
__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; }
__forceinline word_t AND(word_t a, word_t b) { return a & b; }
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
return lo2hi << 32 | hi2lo >> 32;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i];
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i];
return result;
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
uint64_t result = a | b;
result |= result >> 32;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); }
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8));
__forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8);
return AND(w, WORD_T(mask));
}
__forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
#endif /* WORD_H_ */
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
*K1 = XOR(*K1, LOAD64(k));
*K2 = XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1,
word_t K2) {
word_t N0, N1;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* initialization */
PINIT(s);
s->x0 = XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
s->x3 = XOR(s->x3, N0);
s->x4 = XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) *px = XOR(*px, LOAD(ad, adlen));
*px = XOR(*px, PAD(adlen));
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
s->x1 = XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
*px = XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
*px = XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
s->x1 = XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
*px = XOR(*px, cx);
STORE(m, *px, clen);
*px = CLEAR(*px, clen);
*px = XOR(*px, cx);
}
*px = XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) {
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
#if ASCON_INLINE_MODE
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#else
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
init(s, npub, K0, K1, K2);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
final(s, K0, K1, K2);
}
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, K0, K1, K2);
/* set tag */
c += mlen;
STOREBYTES(c, s.x3, 8);
STOREBYTES(c + 8, s.x4, 8);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, K0, K1, K2);
/* verify tag (should be constant time, check compiler output) */
c += clen;
s.x3 = XOR(s.x3, LOADBYTES(c, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif /* ASCON_H */
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 1
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif /* ENDIAN_H_ */
#include "permutations.h"
#include "round.h"
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x80400c0600000000)
#define ASCON_128A_IV WORD_T(0x80800c0800000000)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000)
#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xf0);
ROUND(s, 0xe1);
ROUND(s, 0xd2);
ROUND(s, 0xc3);
ROUND(s, 0xb4);
ROUND(s, 0xa5);
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0xb4);
ROUND(s, 0xa5);
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
#else /* !ASCON_UNROLL_LOOPS */
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment