diff --git a/ascon/Implementations/CMakeLists.txt b/ascon/Implementations/CMakeLists.txt index 99a1754..f08522a 100644 --- a/ascon/Implementations/CMakeLists.txt +++ b/ascon/Implementations/CMakeLists.txt @@ -5,11 +5,11 @@ enable_testing() # set the default version, algorithms, implementations, tests, flags, defs set(DEFAULT_VERSIONS v12) set(DEFAULT_ALGS ascon128 ascon128a ascon80pq asconhash asconxof) -set(DEFAULT_IMPLS ref opt64 opt64_lowsize bi32 bi32_lowsize bi32_lowreg bi16 bi8) +set(DEFAULT_IMPLS ref opt64 opt64_lowsize bi32 bi32_lowsize bi32_lowreg opt8 bi8) set(DEFAULT_TESTS genkat getcycles) set(DEFAULT_REL_FLAGS -std=c99 -O2 -fomit-frame-pointer -march=native -mtune=native) set(DEFAULT_DBG_FLAGS -std=c99 -O2 -Wall -Wextra -Wshadow) -set(DEFAULT_COMPILE_DEFS -DASCON_DATA_ACCESS='H') +set(DEFAULT_COMPILE_DEFS) # set cmake variables for version, algorithms, implementations, tests, flags, defs set(VERSION_LIST ${DEFAULT_VERSIONS} CACHE STRING "Choose the ascon versions to include.") diff --git a/ascon/Implementations/README.md b/ascon/Implementations/README.md index a8056c1..a9df416 100644 --- a/ascon/Implementations/README.md +++ b/ascon/Implementations/README.md @@ -19,10 +19,10 @@ and the following implementations: - `opt64_lowsize`: 64-bit size-optimized C implementation - `neon`: NEON speed-optimized ARM inline assembly implementation - `bi32`: 32-bit speed-optimized bit-interleaved C implementation -- `bi32_lowsize`: 32-bit size-optimized bit-interleaved C implementation -- `bi32_lowreg`: 32-bit speed-optimized bit-interleaved C implementation (low register usage) - `bi32_arm`: 32-bit speed-optimized bit-interleaved ARM inline assembly implementation -- `bi16`: 16-bit optimized bit-interleaved C implementation +- `bi32_lowreg`: 32-bit speed-optimized bit-interleaved C implementation (low register usage) +- `bi32_lowsize`: 32-bit size-optimized bit-interleaved C implementation +- `opt8`: 8-bit optimized C implementation - `bi8`: 8-bit optimized bit-interleaved C implementation @@ -38,7 +38,7 @@ and the following implementations: | Cortex-A15 (ARMv7)\* | | | | | 69.8 | 36.2 | 34.6 | | Cortex-A7 (NEON) | 2182 | 249 | 148 | 97 | 71.7 | 47.5 | 46.5 | | Cortex-A7 (ARMv7) | 1871 | 292 | 175 | 115 | 86.6 | 58.3 | 57.2 | -| ARM1176JZF-S (ARMv6) | 2189 | 340 | 202 | 133 | 97.9 | 64.4 | 65.3 | +| ARM1176JZF-S (ARMv6) | 2136 | 312 | 186 | 123 | 91.6 | 61.8 | 62.2 | \* Results taken from eBACS: http://bench.cr.yp.to/ @@ -55,7 +55,7 @@ and the following implementations: | Cortex-A15 (ARMv7)\* | | | | | 60.3 | 25.3 | 23.8 | | Cortex-A7 (NEON) | 2204 | 226 | 132 | 82 | 55.9 | 31.7 | 30.7 | | Cortex-A7 (ARMv7) | 1911 | 255 | 161 | 102 | 71.3 | 42.3 | 41.2 | -| ARM1176JZF-S (ARMv6) | 2267 | 303 | 191 | 120 | 84.4 | 50.0 | 50.2 | +| ARM1176JZF-S (ARMv6) | 2118 | 261 | 170 | 107 | 75.6 | 46.0 | 46.6 | \* Results taken from eBACS: http://bench.cr.yp.to/ @@ -153,11 +153,17 @@ Get CPU cycles: * Determine the scaling factor between the actual and base frequency: - factor = actual frequency / base frequency -* Run the getcycles program using the frequency factor and watch the results: +* Run a getcycles program using the frequency factor and watch the results: ``` while true; do ./getcycles_crypto_aead_ascon128v12_opt64 $factor; done ``` +* Run the `benchmark.sh` script with a specific algorithm and frequency factor + to benchmark all built implementations: + ``` + ./benchmark.sh ascon128v12 $factor + ``` + ## Hints to activate the performance monitor unit (PMU) on ARM CPUs: diff --git a/ascon/Implementations/benchmark.sh b/ascon/Implementations/benchmark.sh index a348640..f02d968 100644 --- a/ascon/Implementations/benchmark.sh +++ b/ascon/Implementations/benchmark.sh @@ -1,8 +1,9 @@ #!/bin/sh -FACTOR=$1 +ALG=$1 +FACTOR=$2 -for i in getcycles*; do +for i in getcycles*$ALG*; do echo echo $i: echo diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/aead.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/config.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/config.h index b1b5080..1447e7d 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32/config.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 0 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'M' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/interleave.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/word.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/word.h index 8ffcaaa..b27c6c9 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32/word.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -__forceinline uint64_t TOBI32(uint64_t in); - -__forceinline uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t TOBI32(uint64_t in) { - uint32_t hi = in >> 32; - uint32_t lo = in; - uint32_t r0, r1; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r0 = (lo & 0x0000FFFF) | (hi << 16); - r1 = (lo >> 16) | (hi & 0xFFFF0000); - return (uint64_t)r1 << 32 | r0; -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t FROMBI32(uint64_t in) { - uint32_t r0 = in; - uint32_t r1 = in >> 32; - uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); - uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - return (uint64_t)hi << 32 | lo; + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/aead.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/config.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/config.h index 4242e2e..0f5a485 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/config.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/interleave.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/word.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/word.h index 8ffcaaa..b27c6c9 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/word.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -__forceinline uint64_t TOBI32(uint64_t in); - -__forceinline uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t TOBI32(uint64_t in) { - uint32_t hi = in >> 32; - uint32_t lo = in; - uint32_t r0, r1; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r0 = (lo & 0x0000FFFF) | (hi << 16); - r1 = (lo >> 16) | (hi & 0xFFFF0000); - return (uint64_t)r1 << 32 | r0; -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t FROMBI32(uint64_t in) { - uint32_t r0 = in; - uint32_t r1 = in >> 32; - uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); - uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - return (uint64_t)hi << 32 | lo; + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/aead.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/config.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/config.h index 5ccce77..0f5a485 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/config.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'M' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/interleave.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/round.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/round.h index bc7a0cd..fa23bf3 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/round.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/round.h @@ -19,58 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { - uint32_t tmp_e, tmp_o; + word_t tmp, C = {.o = C_o, .e = C_e}; /* round constant */ - s->x2.e ^= C_e; - s->x2.o ^= C_o; + s->x2 = XOR(s->x2, C); /* s-box layer */ - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; - s->x4.e ^= s->x3.e; - s->x4.o ^= s->x3.o; - s->x2.e ^= s->x1.e; - s->x2.o ^= s->x1.o; - tmp_e = s->x0.e & (~s->x4.e); - tmp_o = s->x0.o & (~s->x4.o); - s->x0.e ^= s->x2.e & (~s->x1.e); - s->x0.o ^= s->x2.o & (~s->x1.o); - s->x2.e ^= s->x4.e & (~s->x3.e); - s->x2.o ^= s->x4.o & (~s->x3.o); - s->x4.e ^= s->x1.e & (~s->x0.e); - s->x4.o ^= s->x1.o & (~s->x0.o); - s->x1.e ^= s->x3.e & (~s->x2.e); - s->x1.o ^= s->x3.o & (~s->x2.o); - s->x3.e ^= tmp_e; - s->x3.o ^= tmp_o; - s->x1.e ^= s->x0.e; - s->x1.o ^= s->x0.o; - s->x3.e ^= s->x2.e; - s->x3.o ^= s->x2.o; - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); /* linear layer */ - tmp_e = s->x0.e ^ ROR32(s->x0.o, 4); - tmp_o = s->x0.o ^ ROR32(s->x0.e, 5); - s->x0.e ^= ROR32(tmp_o, 9); - s->x0.o ^= ROR32(tmp_e, 10); - tmp_e = s->x1.e ^ ROR32(s->x1.e, 11); - tmp_o = s->x1.o ^ ROR32(s->x1.o, 11); - s->x1.e ^= ROR32(tmp_o, 19); - s->x1.o ^= ROR32(tmp_e, 20); - tmp_e = s->x2.e ^ ROR32(s->x2.o, 2); - tmp_o = s->x2.o ^ ROR32(s->x2.e, 3); - s->x2.e ^= tmp_o; - s->x2.o ^= ROR32(tmp_e, 1); - tmp_e = s->x3.e ^ ROR32(s->x3.o, 3); - tmp_o = s->x3.o ^ ROR32(s->x3.e, 4); - s->x3.e ^= ROR32(tmp_e, 5); - s->x3.o ^= ROR32(tmp_o, 5); - tmp_e = s->x4.e ^ ROR32(s->x4.e, 17); - tmp_o = s->x4.o ^ ROR32(s->x4.o, 17); - s->x4.e ^= ROR32(tmp_o, 3); - s->x4.o ^= ROR32(tmp_e, 4); - s->x2.e = ~s->x2.e; - s->x2.o = ~s->x2.o; + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/word.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/word.h index 8ffcaaa..b27c6c9 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/word.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -__forceinline uint64_t TOBI32(uint64_t in); - -__forceinline uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t TOBI32(uint64_t in) { - uint32_t hi = in >> 32; - uint32_t lo = in; - uint32_t r0, r1; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r0 = (lo & 0x0000FFFF) | (hi << 16); - r1 = (lo >> 16) | (hi & 0xFFFF0000); - return (uint64_t)r1 << 32 | r0; -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t FROMBI32(uint64_t in) { - uint32_t r0 = in; - uint32_t r1 = in >> 32; - uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); - uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - return (uint64_t)hi << 32 | lo; + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/aead.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/aead.c new file mode 100644 index 0000000..6354194 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/aead.c @@ -0,0 +1,61 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t K0, K1, K2; + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = LOAD64(npub); + s->x4 = LOAD64(npub + 8); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_ABSORB); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + if (mode == ASCON_ENCRYPT) printstate("process plaintext", s); + if (mode == ASCON_DECRYPT) printstate("process ciphertext", s); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/ascon.h index aa685d3..c89ec89 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/ascon.h @@ -10,9 +10,11 @@ typedef struct { word_t x0, x1, x2, x3, x4; } state_t; -#define ASCON_AD 0 -#define ASCON_ENC 1 -#define ASCON_DEC 2 +#define ASCON_ABSORB 0x1 +#define ASCON_SQUEEZE 0x2 +#define ASCON_INSERT 0x4 +#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE) +#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT) void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, uint8_t mode); @@ -21,4 +23,4 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* k, uint8_t mode); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/config.h index 19426ab..ca59e3b 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/config.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'B' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c index 605bd03..42b1e4b 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -20,10 +19,10 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; /* ascon decryption */ - ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT); /* verify tag (should be constant time, check compiler output) */ - XOR(s.x3, LOAD64(c + *mlen)); - XOR(s.x4, LOAD64(c + *mlen + 8)); + s.x3 = XOR(s.x3, LOADBYTES(c + *mlen, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + *mlen + 8, 8)); if (NOTZERO(s.x3, s.x4)) { *mlen = 0; return -1; diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c index 641b266..80e2bcf 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -17,9 +16,9 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; /* ascon encryption */ - ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT); /* set tag */ - STORE64(c + mlen, s.x3); - STORE64(c + mlen + 8, s.x4); + STOREBYTES(c + mlen, s.x3, 8); + STOREBYTES(c + mlen + 8, s.x4, 8); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/interleave.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/interleave.c new file mode 100644 index 0000000..321d0ce --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/interleave.c @@ -0,0 +1,42 @@ +#include "interleave.h" + +static inline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +static inline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/interleave.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/interleave.h new file mode 100644 index 0000000..f6590fb --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/interleave.h @@ -0,0 +1,9 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +uint64_t deinterleave32(uint64_t in); +uint64_t interleave32(uint64_t in); + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/process.c index 7208ed7..6681f81 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/process.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/process.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -11,13 +10,13 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, while (len >= ASCON_RATE) { tmp0 = LOAD64(in); tmp1 = LOAD64(in + 8); - XOR(s->x0, tmp0); - XOR(s->x1, tmp1); - if (mode != ASCON_AD) { + s->x0 = XOR(s->x0, tmp0); + s->x1 = XOR(s->x1, tmp1); + if (mode & ASCON_SQUEEZE) { STORE64(out, s->x0); STORE64(out + 8, s->x1); } - if (mode == ASCON_DEC) { + if (mode & ASCON_INSERT) { s->x0 = tmp0; s->x1 = tmp1; } @@ -34,28 +33,28 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, tmp1 = LOAD(in + 8, len - 8); else tmp0 = LOAD(in, len); - XOR(s->x0, tmp0); - XOR(s->x1, tmp1); - if (mode != ASCON_AD) { + s->x0 = XOR(s->x0, tmp0); + s->x1 = XOR(s->x1, tmp1); + if (mode & ASCON_SQUEEZE) { if (len >= 8) STORE64(out, s->x0); if (len > 8) STORE(out + 8, s->x1, len - 8); else STORE(out, s->x0, len); } - if (mode == ASCON_DEC) { + if (mode & ASCON_INSERT) { if (len >= 8) s->x0 = tmp0; if (len > 8) { - AND(s->x1, XMASK(len - 8)); - XOR(s->x1, tmp1); + s->x1 = CLEAR(s->x1, len - 8); + s->x1 = XOR(s->x1, tmp1); } else { - AND(s->x0, XMASK(len)); - XOR(s->x0, tmp0); + s->x0 = CLEAR(s->x0, len); + s->x0 = XOR(s->x0, tmp0); } } } if (len < 8) - XOR(s->x0, PAD(len % 8)); + s->x0 = XOR(s->x0, PAD(len % 8)); else - XOR(s->x1, PAD(len % 8)); + s->x1 = XOR(s->x1, PAD(len % 8)); } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/round.h index d8ea3b6..fa23bf3 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/round.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/round.h @@ -19,66 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { - state_t t; + word_t tmp, C = {.o = C_o, .e = C_e}; /* round constant */ - s->x2.e ^= C_e; - s->x2.o ^= C_o; + s->x2 = XOR(s->x2, C); /* s-box layer */ - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; - s->x4.e ^= s->x3.e; - s->x4.o ^= s->x3.o; - s->x2.e ^= s->x1.e; - s->x2.o ^= s->x1.o; - t.x0.e = s->x0.e; - t.x0.o = s->x0.o; - t.x4.e = s->x4.e; - t.x4.o = s->x4.o; - t.x3.e = s->x3.e; - t.x3.o = s->x3.o; - t.x1.e = s->x1.e; - t.x1.o = s->x1.o; - t.x2.e = s->x2.e; - t.x2.o = s->x2.o; - s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); - s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); - s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); - s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); - s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); - s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); - s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); - s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); - s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); - s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); - s->x1.e ^= s->x0.e; - s->x1.o ^= s->x0.o; - s->x3.e ^= s->x2.e; - s->x3.o ^= s->x2.o; - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); /* linear layer */ - t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); - t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); - t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); - t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); - t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); - t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); - t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); - t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); - t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); - t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); - s->x0.e ^= ROR32(t.x0.o, 9); - s->x0.o ^= ROR32(t.x0.e, 10); - s->x1.e ^= ROR32(t.x1.o, 19); - s->x1.o ^= ROR32(t.x1.e, 20); - s->x2.e ^= t.x2.o; - s->x2.o ^= ROR32(t.x2.e, 1); - s->x3.e ^= ROR32(t.x3.e, 5); - s->x3.o ^= ROR32(t.x3.o, 5); - s->x4.e ^= ROR32(t.x4.o, 3); - s->x4.o ^= ROR32(t.x4.e, 4); - s->x2.e = ~s->x2.e; - s->x2.o = ~s->x2.o; + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/word.h index 45184ca..b27c6c9 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/word.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -uint64_t TOBI32(uint64_t in); - -uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,22 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/aead.c b/ascon/Implementations/crypto_aead/ascon128av12/bi8/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/bi8/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/config.h b/ascon/Implementations/crypto_aead/ascon128av12/bi8/config.h new file mode 100644 index 0000000..ef5eb14 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/bi8/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/interleave.c b/ascon/Implementations/crypto_aead/ascon128av12/bi8/interleave.c new file mode 100644 index 0000000..659255b --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/interleave.c @@ -0,0 +1,12 @@ +#include "interleave.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t interleave8(uint64_t x) { + x = (x & 0xaa55aa55aa55aa55ull) | ((x & 0x00aa00aa00aa00aaull) << 7) | + ((x >> 7) & 0x00aa00aa00aa00aaull); + x = (x & 0xcccc3333cccc3333ull) | ((x & 0x0000cccc0000ccccull) << 14) | + ((x >> 14) & 0x0000cccc0000ccccull); + x = (x & 0xf0f0f0f00f0f0f0full) | ((x & 0x00000000f0f0f0f0ull) << 28) | + ((x >> 28) & 0x00000000f0f0f0f0ull); + return x; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/interleave.h b/ascon/Implementations/crypto_aead/ascon128av12/bi8/interleave.h new file mode 100644 index 0000000..62937e0 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/interleave.h @@ -0,0 +1,8 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +uint64_t interleave8(uint64_t x); + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/bi8/permutations.c new file mode 100644 index 0000000..a0cc038 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/permutations.c @@ -0,0 +1,45 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint64_t constants[12] = { + 0x0101010100000000ull, 0x0101010000000001ull, 0x0101000100000100ull, + 0x0101000000000101ull, 0x0100010100010000ull, 0x0100010000010001ull, + 0x0100000100010100ull, 0x0100000000010101ull, 0x0001010101000000ull, + 0x0001010001000001ull, 0x0001000101000100ull, 0x0001000001000101ull}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; ++i) ROUND(s, constants[i]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi8/permutations.h new file mode 100644 index 0000000..ee1b625 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/permutations.h @@ -0,0 +1,165 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x8040000020301000ull) +#define ASCON_128A_IV WORD_T(0xc000000030200000ull) +#define ASCON_80PQ_IV WORD_T(0x8040800020301000ull) +#define ASCON_HASH_IV WORD_T(0x0040000020200002ull) +#define ASCON_XOF_IV WORD_T(0x0040000020200000ull) + +#define ASCON_HASH_IV0 WORD_T(0xfa8e976bb985dc4dull) +#define ASCON_HASH_IV1 WORD_T(0xc8085072a40ccd94ull) +#define ASCON_HASH_IV2 WORD_T(0xfe1781be5a847314ull) +#define ASCON_HASH_IV3 WORD_T(0x2f871f6c6d0082b2ull) +#define ASCON_HASH_IV4 WORD_T(0x7a1ba68850ec407eull) + +#define ASCON_XOF_IV0 WORD_T(0x8a46f0d354e771b8ull) +#define ASCON_XOF_IV1 WORD_T(0x04489f4084368cd0ull) +#define ASCON_XOF_IV2 WORD_T(0x6c94f2150dbcf66cull) +#define ASCON_XOF_IV3 WORD_T(0x48965294f143b44eull) +#define ASCON_XOF_IV4 WORD_T(0x0788515fe0e5fb8aull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0x0101010100000000ull); + ROUND(s, 0x0101010000000001ull); + ROUND(s, 0x0101000100000100ull); + ROUND(s, 0x0101000000000101ull); + ROUND(s, 0x0100010100010000ull); + ROUND(s, 0x0100010000010001ull); + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x0100010100010000ull); + ROUND(s, 0x0100010000010001ull); + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint64_t constants[12]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; ++i) ROUND(s, constants[i]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; ++i) ROUND(s, constants[i]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; ++i) ROUND(s, constants[i]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/bi8/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/round.h b/ascon/Implementations/crypto_aead/ascon128av12/bi8/round.h new file mode 100644 index 0000000..15c7795 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, WORD_T(C)); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/word.h b/ascon/Implementations/crypto_aead/ascon128av12/bi8/word.h new file mode 100644 index 0000000..f1b5cbb --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/word.h @@ -0,0 +1,129 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" +#include "interleave.h" + +typedef union { + uint64_t w; + uint8_t b[8]; +} word_t; + +__forceinline uint8_t ROR8(uint8_t a, int n) { return a >> n | a << (8 - n); } + +__forceinline word_t ROR64(word_t a, int n) { + word_t b; + b.b[0] = ROR8(a.b[(n + 0) & 0x7], (n + 0) >> 3); + b.b[1] = ROR8(a.b[(n + 1) & 0x7], (n + 1) >> 3); + b.b[2] = ROR8(a.b[(n + 2) & 0x7], (n + 2) >> 3); + b.b[3] = ROR8(a.b[(n + 3) & 0x7], (n + 3) >> 3); + b.b[4] = ROR8(a.b[(n + 4) & 0x7], (n + 4) >> 3); + b.b[5] = ROR8(a.b[(n + 5) & 0x7], (n + 5) >> 3); + b.b[6] = ROR8(a.b[(n + 6) & 0x7], (n + 6) >> 3); + b.b[7] = ROR8(a.b[(n + 7) & 0x7], (n + 7) >> 3); + return b; +} + +__forceinline word_t WORD_T(uint64_t x) { + word_t w; + w.w = x; + return w; +} + +__forceinline uint64_t UINT64_T(word_t w) { + uint64_t x; + x = w.w; + return x; +} + +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(interleave8(x)); } + +__forceinline uint64_t WORDTOU64(word_t w) { return interleave8(UINT64_T(w)); } + +__forceinline word_t NOT(word_t a) { + a.w = ~a.w; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.w ^= b.w; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.w &= b.w; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t w; + w.w = lo2hi.w << 32 | hi2lo.w >> 32; + return w; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a.w | b.w; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { return (word_t){.b[7] = 0x80 >> i}; } + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint8_t m = 0xff >> n; + word_t mask = { + .b[0] = m, + .b[1] = m, + .b[2] = m, + .b[3] = m, + .b[4] = m, + .b[5] = m, + .b[6] = m, + .b[7] = m, + }; + return AND(w, mask); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/neon/ascon.h new file mode 100644 index 0000000..f6b6ebc --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/ascon.h @@ -0,0 +1,12 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/config.h b/ascon/Implementations/crypto_aead/ascon128av12/neon/config.h new file mode 100644 index 0000000..7dfad92 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 1 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/neon/permutations.h index 91def93..2146445 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/neon/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/permutations.h @@ -1,9 +1,9 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef struct { - uint64_t x0, x1, x2, x3, x4; -} state_t; +#include "ascon.h" +#include "config.h" +#include "round.h" static const uint64_t C[12] = { 0xffffffffffffff0full, 0xffffffffffffff1eull, 0xffffffffffffff2dull, @@ -12,52 +12,6 @@ static const uint64_t C[12] = { 0xffffffffffffff96ull, 0xffffffffffffffa5ull, 0xffffffffffffffb4ull, }; -/* clang-format off */ -#define ROUND(OFFSET) \ - "vldr d31, [%[C], #" #OFFSET "] \n\t" \ - "veor d0, d0, d4 \n\t" \ - "veor d4, d4, d3 \n\t" \ - "veor d2, d2, d31 \n\t" \ - "vbic d13, d0, d4 \n\t" \ - "vbic d12, d4, d3 \n\t" \ - "veor d2, d2, d1 \n\t" \ - "vbic d14, d1, d0 \n\t" \ - "vbic d11, d3, d2 \n\t" \ - "vbic d10, d2, d1 \n\t" \ - "veor q0, q0, q5 \n\t" \ - "veor q1, q1, q6 \n\t" \ - "veor d4, d4, d14 \n\t" \ - "veor d1, d1, d0 \n\t" \ - "veor d3, d3, d2 \n\t" \ - "veor d0, d0, d4 \n\t" \ - "vsri.64 d14, d4, #7 \n\t" \ - "vsri.64 d24, d4, #41 \n\t" \ - "vsri.64 d11, d1, #39 \n\t" \ - "vsri.64 d21, d1, #61 \n\t" \ - "vsri.64 d10, d0, #19 \n\t" \ - "vsri.64 d20, d0, #28 \n\t" \ - "vsri.64 d12, d2, #1 \n\t" \ - "vsri.64 d22, d2, #6 \n\t" \ - "vsri.64 d13, d3, #10 \n\t" \ - "vsri.64 d23, d3, #17 \n\t" \ - "vsli.64 d10, d0, #45 \n\t" \ - "vsli.64 d20, d0, #36 \n\t" \ - "vsli.64 d11, d1, #25 \n\t" \ - "vsli.64 d21, d1, #3 \n\t" \ - "vsli.64 d12, d2, #63 \n\t" \ - "vsli.64 d22, d2, #58 \n\t" \ - "vsli.64 d13, d3, #54 \n\t" \ - "vsli.64 d23, d3, #47 \n\t" \ - "vsli.64 d14, d4, #57 \n\t" \ - "vsli.64 d24, d4, #23 \n\t" \ - "veor q5, q5, q0 \n\t" \ - "veor q6, q6, q1 \n\t" \ - "veor d14, d14, d4 \n\t" \ - "veor q0, q5, q10 \n\t" \ - "veor d4, d14, d24 \n\t" \ - "veor q1, q6, q11 \n\t" -/* clang-format on */ - #define P12() \ __asm__ __volatile__ ( \ ".arm \n\t" \ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/round.h b/ascon/Implementations/crypto_aead/ascon128av12/neon/round.h new file mode 100644 index 0000000..40e9baa --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/round.h @@ -0,0 +1,50 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +/* clang-format off */ +#define ROUND(OFFSET) \ + "vldr d31, [%[C], #" #OFFSET "] \n\t" \ + "veor d0, d0, d4 \n\t" \ + "veor d4, d4, d3 \n\t" \ + "veor d2, d2, d31 \n\t" \ + "vbic d13, d0, d4 \n\t" \ + "vbic d12, d4, d3 \n\t" \ + "veor d2, d2, d1 \n\t" \ + "vbic d14, d1, d0 \n\t" \ + "vbic d11, d3, d2 \n\t" \ + "vbic d10, d2, d1 \n\t" \ + "veor q0, q0, q5 \n\t" \ + "veor q1, q1, q6 \n\t" \ + "veor d4, d4, d14 \n\t" \ + "veor d1, d1, d0 \n\t" \ + "veor d3, d3, d2 \n\t" \ + "veor d0, d0, d4 \n\t" \ + "vsri.64 d14, d4, #7 \n\t" \ + "vsri.64 d24, d4, #41 \n\t" \ + "vsri.64 d11, d1, #39 \n\t" \ + "vsri.64 d21, d1, #61 \n\t" \ + "vsri.64 d10, d0, #19 \n\t" \ + "vsri.64 d20, d0, #28 \n\t" \ + "vsri.64 d12, d2, #1 \n\t" \ + "vsri.64 d22, d2, #6 \n\t" \ + "vsri.64 d13, d3, #10 \n\t" \ + "vsri.64 d23, d3, #17 \n\t" \ + "vsli.64 d10, d0, #45 \n\t" \ + "vsli.64 d20, d0, #36 \n\t" \ + "vsli.64 d11, d1, #25 \n\t" \ + "vsli.64 d21, d1, #3 \n\t" \ + "vsli.64 d12, d2, #63 \n\t" \ + "vsli.64 d22, d2, #58 \n\t" \ + "vsli.64 d13, d3, #54 \n\t" \ + "vsli.64 d23, d3, #47 \n\t" \ + "vsli.64 d14, d4, #57 \n\t" \ + "vsli.64 d24, d4, #23 \n\t" \ + "veor q5, q5, q0 \n\t" \ + "veor q6, q6, q1 \n\t" \ + "veor d14, d14, d4 \n\t" \ + "veor q0, q5, q10 \n\t" \ + "veor d4, d14, d24 \n\t" \ + "veor q1, q6, q11 \n\t" +/* clang-format on */ + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/word.h b/ascon/Implementations/crypto_aead/ascon128av12/neon/word.h new file mode 100644 index 0000000..5a1519b --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/word.h @@ -0,0 +1,35 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#define WORDTOU64 +#define U64TOWORD + +typedef uint64_t word_t; + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STOREBYTES(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +static inline uint64_t CLEARBYTES(uint64_t x, int n) { + for (int i = 0; i < n; ++i) x &= ~SETBYTE(0xff, i); + return x; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/aead.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/config.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/config.h index 8d8a1a0..7dfad92 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64/config.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/permutations.h index 6172dd5..66f3cf3 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/word.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/word.h index 5d601bb..9acbd12 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64/word.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/word.h @@ -3,45 +3,78 @@ #include -#include "config.h" +#include "endian.h" typedef uint64_t word_t; #define WORD_T #define UINT64_T - #define U64TOWORD #define WORDTOU64 -#define XOR(a, b) \ - do { \ - (a) ^= (b); \ - } while (0) +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } -#define AND(a, b) \ - do { \ - (a) &= (b); \ - } while (0) +__forceinline word_t NOT(word_t a) { return ~a; } -__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } +__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; } + +__forceinline word_t AND(word_t a, word_t b) { return a & b; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return lo2hi << 32 | hi2lo >> 32; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a | b; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { - return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/aead.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/aead.c new file mode 100644 index 0000000..6354194 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/aead.c @@ -0,0 +1,61 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t K0, K1, K2; + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = LOAD64(npub); + s->x4 = LOAD64(npub + 8); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_ABSORB); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + if (mode == ASCON_ENCRYPT) printstate("process plaintext", s); + if (mode == ASCON_DECRYPT) printstate("process ciphertext", s); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/ascon.h index aa685d3..c89ec89 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/ascon.h @@ -10,9 +10,11 @@ typedef struct { word_t x0, x1, x2, x3, x4; } state_t; -#define ASCON_AD 0 -#define ASCON_ENC 1 -#define ASCON_DEC 2 +#define ASCON_ABSORB 0x1 +#define ASCON_SQUEEZE 0x2 +#define ASCON_INSERT 0x4 +#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE) +#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT) void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, uint8_t mode); @@ -21,4 +23,4 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* k, uint8_t mode); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/config.h index ec8bd6f..ca59e3b 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/config.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c index 605bd03..42b1e4b 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -20,10 +19,10 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; /* ascon decryption */ - ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT); /* verify tag (should be constant time, check compiler output) */ - XOR(s.x3, LOAD64(c + *mlen)); - XOR(s.x4, LOAD64(c + *mlen + 8)); + s.x3 = XOR(s.x3, LOADBYTES(c + *mlen, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + *mlen + 8, 8)); if (NOTZERO(s.x3, s.x4)) { *mlen = 0; return -1; diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c index 641b266..80e2bcf 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -17,9 +16,9 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; /* ascon encryption */ - ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT); /* set tag */ - STORE64(c + mlen, s.x3); - STORE64(c + mlen + 8, s.x4); + STOREBYTES(c + mlen, s.x3, 8); + STOREBYTES(c + mlen + 8, s.x4, 8); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h index 6172dd5..66f3cf3 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/process.c index 7208ed7..6681f81 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/process.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/process.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -11,13 +10,13 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, while (len >= ASCON_RATE) { tmp0 = LOAD64(in); tmp1 = LOAD64(in + 8); - XOR(s->x0, tmp0); - XOR(s->x1, tmp1); - if (mode != ASCON_AD) { + s->x0 = XOR(s->x0, tmp0); + s->x1 = XOR(s->x1, tmp1); + if (mode & ASCON_SQUEEZE) { STORE64(out, s->x0); STORE64(out + 8, s->x1); } - if (mode == ASCON_DEC) { + if (mode & ASCON_INSERT) { s->x0 = tmp0; s->x1 = tmp1; } @@ -34,28 +33,28 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, tmp1 = LOAD(in + 8, len - 8); else tmp0 = LOAD(in, len); - XOR(s->x0, tmp0); - XOR(s->x1, tmp1); - if (mode != ASCON_AD) { + s->x0 = XOR(s->x0, tmp0); + s->x1 = XOR(s->x1, tmp1); + if (mode & ASCON_SQUEEZE) { if (len >= 8) STORE64(out, s->x0); if (len > 8) STORE(out + 8, s->x1, len - 8); else STORE(out, s->x0, len); } - if (mode == ASCON_DEC) { + if (mode & ASCON_INSERT) { if (len >= 8) s->x0 = tmp0; if (len > 8) { - AND(s->x1, XMASK(len - 8)); - XOR(s->x1, tmp1); + s->x1 = CLEAR(s->x1, len - 8); + s->x1 = XOR(s->x1, tmp1); } else { - AND(s->x0, XMASK(len)); - XOR(s->x0, tmp0); + s->x0 = CLEAR(s->x0, len); + s->x0 = XOR(s->x0, tmp0); } } } if (len < 8) - XOR(s->x0, PAD(len % 8)); + s->x0 = XOR(s->x0, PAD(len % 8)); else - XOR(s->x1, PAD(len % 8)); + s->x1 = XOR(s->x1, PAD(len % 8)); } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/round.h index 077cbfd..cc5cd9f 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/round.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/round.h @@ -19,50 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint64_t C) { - state_t t; - s->x2 ^= C; - s->x0 ^= s->x4; - s->x4 ^= s->x3; - s->x2 ^= s->x1; - t.x0 = s->x0; - t.x4 = s->x4; - t.x3 = s->x3; - t.x1 = s->x1; - t.x2 = s->x2; - s->x0 = t.x0 ^ (~t.x1 & t.x2); - s->x2 = t.x2 ^ (~t.x3 & t.x4); - s->x4 = t.x4 ^ (~t.x0 & t.x1); - s->x1 = t.x1 ^ (~t.x2 & t.x3); - s->x3 = t.x3 ^ (~t.x4 & t.x0); - s->x1 ^= s->x0; - t.x1 = s->x1; - s->x1 = ROR64(s->x1, 39); - s->x3 ^= s->x2; - t.x2 = s->x2; - s->x2 = ROR64(s->x2, 1); - t.x4 = s->x4; - t.x2 ^= s->x2; - s->x2 = ROR64(s->x2, 6 - 1); - t.x3 = s->x3; - t.x1 ^= s->x1; - s->x3 = ROR64(s->x3, 10); - s->x0 ^= s->x4; - s->x4 = ROR64(s->x4, 7); - t.x3 ^= s->x3; - s->x2 ^= t.x2; - s->x1 = ROR64(s->x1, 61 - 39); - t.x0 = s->x0; - s->x2 = ~s->x2; - s->x3 = ROR64(s->x3, 17 - 10); - t.x4 ^= s->x4; - s->x4 = ROR64(s->x4, 41 - 7); - s->x3 ^= t.x3; - s->x1 ^= t.x1; - s->x0 = ROR64(s->x0, 19); - s->x4 ^= t.x4; - t.x0 ^= s->x0; - s->x0 = ROR64(s->x0, 28 - 19); - s->x0 ^= t.x0; + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, C); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/word.h index 5d601bb..9acbd12 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/word.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/word.h @@ -3,45 +3,78 @@ #include -#include "config.h" +#include "endian.h" typedef uint64_t word_t; #define WORD_T #define UINT64_T - #define U64TOWORD #define WORDTOU64 -#define XOR(a, b) \ - do { \ - (a) ^= (b); \ - } while (0) +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } -#define AND(a, b) \ - do { \ - (a) &= (b); \ - } while (0) +__forceinline word_t NOT(word_t a) { return ~a; } -__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } +__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; } + +__forceinline word_t AND(word_t a, word_t b) { return a & b; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return lo2hi << 32 | hi2lo >> 32; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a | b; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { - return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt8/aead.c b/ascon/Implementations/crypto_aead/ascon128av12/opt8/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt8/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt8/api.h b/ascon/Implementations/crypto_aead/ascon128av12/opt8/api.h new file mode 100644 index 0000000..951ee9c --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt8/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt8/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/opt8/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt8/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt8/config.h b/ascon/Implementations/crypto_aead/ascon128av12/opt8/config.h new file mode 100644 index 0000000..ef5eb14 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt8/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt8/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/opt8/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt8/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt8/implementors b/ascon/Implementations/crypto_aead/ascon128av12/opt8/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt8/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt8/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/opt8/permutations.c new file mode 100644 index 0000000..8d39320 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt8/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt8/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/opt8/permutations.h new file mode 100644 index 0000000..66f3cf3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt8/permutations.h @@ -0,0 +1,163 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt8/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/opt8/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt8/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt8/round.h b/ascon/Implementations/crypto_aead/ascon128av12/opt8/round.h new file mode 100644 index 0000000..15c7795 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt8/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, WORD_T(C)); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt8/word.h b/ascon/Implementations/crypto_aead/ascon128av12/opt8/word.h new file mode 100644 index 0000000..873313d --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt8/word.h @@ -0,0 +1,115 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" + +typedef union { + uint64_t w; + uint8_t b[8]; +} word_t; + +#define U64TOWORD WORD_T +#define WORDTOU64 UINT64_T + +#define XMUL(i, x) \ + do { \ + tmp = (uint16_t)a.b[i] * (1 << (x)); \ + b.b[(byte_rol + (i)) & 0x7] ^= (uint8_t)tmp; \ + b.b[(byte_rol + (i) + 1) & 0x7] ^= tmp >> 8; \ + } while (0) + +__forceinline word_t ROR64(word_t a, int n) { + word_t b = {.w = 0ull}; + int bit_rol = (64 - n) & 0x7; + int byte_rol = (64 - n) >> 3; + uint16_t tmp; + XMUL(0, bit_rol); + XMUL(1, bit_rol); + XMUL(2, bit_rol); + XMUL(3, bit_rol); + XMUL(4, bit_rol); + XMUL(5, bit_rol); + XMUL(6, bit_rol); + XMUL(7, bit_rol); + return b; +} + +__forceinline word_t WORD_T(uint64_t x) { return (word_t){.w = x}; } + +__forceinline uint64_t UINT64_T(word_t w) { return w.w; } + +__forceinline word_t NOT(word_t a) { + a.w = ~a.w; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.w ^= b.w; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.w &= b.w; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return (word_t){.w = lo2hi.w << 32 | hi2lo.w >> 32}; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a.w | b.w; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/ref/ascon.h index 8ab0502..c998868 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/ref/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/ascon.h @@ -3,12 +3,8 @@ #include -#define WORDTOU64 - -typedef uint64_t word_t; - typedef struct { - word_t x0, x1, x2, x3, x4; + uint64_t x0, x1, x2, x3, x4; } state_t; -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/ref/decrypt.c index daafb5e..4c23a66 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/ref/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/decrypt.c @@ -1,8 +1,8 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" +#include "word.h" int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, const uint8_t* c, uint64_t clen, const uint8_t* ad, @@ -20,10 +20,10 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, *mlen = clen - CRYPTO_ABYTES; /* load key and nonce */ - K0 = LOAD(k, 8); - K1 = LOAD(k + 8, 8); - N0 = LOAD(npub, 8); - N1 = LOAD(npub + 8, 8); + K0 = LOADBYTES(k, 8); + K1 = LOADBYTES(k + 8, 8); + N0 = LOADBYTES(npub, 8); + N1 = LOADBYTES(npub + 8, 8); /* initialization */ s.x0 = ASCON_128A_IV; @@ -39,19 +39,19 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* process associated data */ if (adlen) { while (adlen >= ASCON_128A_RATE) { - s.x0 ^= LOAD(ad, 8); - s.x1 ^= LOAD(ad + 8, 8); + s.x0 ^= LOADBYTES(ad, 8); + s.x1 ^= LOADBYTES(ad + 8, 8); P8(&s); ad += ASCON_128A_RATE; adlen -= ASCON_128A_RATE; } /* final associated data block */ if (adlen >= 8) { - s.x0 ^= LOAD(ad, 8); - s.x1 ^= LOAD(ad + 8, adlen - 8); + s.x0 ^= LOADBYTES(ad, 8); + s.x1 ^= LOADBYTES(ad + 8, adlen - 8); s.x1 ^= PAD(adlen - 8); } else { - s.x0 ^= LOAD(ad, adlen); + s.x0 ^= LOADBYTES(ad, adlen); s.x0 ^= PAD(adlen); } P8(&s); @@ -62,10 +62,10 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* process ciphertext */ clen -= CRYPTO_ABYTES; while (clen >= ASCON_128A_RATE) { - uint64_t c0 = LOAD(c, 8); - uint64_t c1 = LOAD(c + 8, 8); - STORE(m, s.x0 ^ c0, 8); - STORE(m + 8, s.x1 ^ c1, 8); + uint64_t c0 = LOADBYTES(c, 8); + uint64_t c1 = LOADBYTES(c + 8, 8); + STOREBYTES(m, s.x0 ^ c0, 8); + STOREBYTES(m + 8, s.x1 ^ c1, 8); s.x0 = c0; s.x1 = c1; P8(&s); @@ -75,18 +75,18 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, } /* final ciphertext block */ if (clen >= 8) { - uint64_t c0 = LOAD(c, 8); - uint64_t c1 = LOAD(c + 8, clen - 8); - STORE(m, s.x0 ^ c0, 8); - STORE(m + 8, s.x1 ^ c1, clen - 8); + uint64_t c0 = LOADBYTES(c, 8); + uint64_t c1 = LOADBYTES(c + 8, clen - 8); + STOREBYTES(m, s.x0 ^ c0, 8); + STOREBYTES(m + 8, s.x1 ^ c1, clen - 8); s.x0 = c0; - s.x1 &= ~MASK(clen - 8); + s.x1 = CLEARBYTES(s.x1, clen - 8); s.x1 |= c1; s.x1 ^= PAD(clen - 8); } else { - uint64_t c0 = LOAD(c, clen); - STORE(m, s.x0 ^ c0, clen); - s.x0 &= ~MASK(clen); + uint64_t c0 = LOADBYTES(c, clen); + STOREBYTES(m, s.x0 ^ c0, clen); + s.x0 = CLEARBYTES(s.x0, clen); s.x0 |= c0; s.x0 ^= PAD(clen); } @@ -102,7 +102,7 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, printstate("finalization", &s); /* verify tag (should be constant time, check compiler output) */ - if ((s.x3 ^ LOAD(c, 8)) | (s.x4 ^ LOAD(c + 8, 8))) { + if ((s.x3 ^ LOADBYTES(c, 8)) | (s.x4 ^ LOADBYTES(c + 8, 8))) { *mlen = 0; return -1; } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/ref/encrypt.c index 63abd34..b75afc7 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/ref/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/encrypt.c @@ -1,8 +1,8 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" +#include "word.h" int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, uint64_t mlen, const uint8_t* ad, uint64_t adlen, @@ -16,10 +16,10 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, *clen = mlen + CRYPTO_ABYTES; /* load key and nonce */ - K0 = LOAD(k, 8); - K1 = LOAD(k + 8, 8); - N0 = LOAD(npub, 8); - N1 = LOAD(npub + 8, 8); + K0 = LOADBYTES(k, 8); + K1 = LOADBYTES(k + 8, 8); + N0 = LOADBYTES(npub, 8); + N1 = LOADBYTES(npub + 8, 8); /* initialization */ s.x0 = ASCON_128A_IV; @@ -35,19 +35,19 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* process associated data */ if (adlen) { while (adlen >= ASCON_128A_RATE) { - s.x0 ^= LOAD(ad, 8); - s.x1 ^= LOAD(ad + 8, 8); + s.x0 ^= LOADBYTES(ad, 8); + s.x1 ^= LOADBYTES(ad + 8, 8); P8(&s); ad += ASCON_128A_RATE; adlen -= ASCON_128A_RATE; } /* final associated data block */ if (adlen >= 8) { - s.x0 ^= LOAD(ad, 8); - s.x1 ^= LOAD(ad + 8, adlen - 8); + s.x0 ^= LOADBYTES(ad, 8); + s.x1 ^= LOADBYTES(ad + 8, adlen - 8); s.x1 ^= PAD(adlen - 8); } else { - s.x0 ^= LOAD(ad, adlen); + s.x0 ^= LOADBYTES(ad, adlen); s.x0 ^= PAD(adlen); } P8(&s); @@ -57,10 +57,10 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* process plaintext */ while (mlen >= ASCON_128A_RATE) { - s.x0 ^= LOAD(m, 8); - s.x1 ^= LOAD(m + 8, 8); - STORE(c, s.x0, 8); - STORE(c + 8, s.x1, 8); + s.x0 ^= LOADBYTES(m, 8); + s.x1 ^= LOADBYTES(m + 8, 8); + STOREBYTES(c, s.x0, 8); + STOREBYTES(c + 8, s.x1, 8); P8(&s); m += ASCON_128A_RATE; c += ASCON_128A_RATE; @@ -68,14 +68,14 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, } /* final plaintext block */ if (mlen >= 8) { - s.x0 ^= LOAD(m, 8); - s.x1 ^= LOAD(m + 8, mlen - 8); - STORE(c, s.x0, 8); - STORE(c + 8, s.x1, mlen - 8); + s.x0 ^= LOADBYTES(m, 8); + s.x1 ^= LOADBYTES(m + 8, mlen - 8); + STOREBYTES(c, s.x0, 8); + STOREBYTES(c + 8, s.x1, mlen - 8); s.x1 ^= PAD(mlen - 8); } else { - s.x0 ^= LOAD(m, mlen); - STORE(c, s.x0, mlen); + s.x0 ^= LOADBYTES(m, mlen); + STOREBYTES(c, s.x0, mlen); s.x0 ^= PAD(mlen); } c += mlen; @@ -90,8 +90,8 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, printstate("finalization", &s); /* set tag */ - STORE(c, s.x3, 8); - STORE(c + 8, s.x4, 8); + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/ref/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/ref/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/word.h b/ascon/Implementations/crypto_aead/ascon128av12/ref/word.h new file mode 100644 index 0000000..5a1519b --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/word.h @@ -0,0 +1,35 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#define WORDTOU64 +#define U64TOWORD + +typedef uint64_t word_t; + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STOREBYTES(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +static inline uint64_t CLEARBYTES(uint64_t x, int n) { + for (int i = 0; i < n; ++i) x &= ~SETBYTE(0xff, i); + return x; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/aead.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/config.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/config.h index b1b5080..1447e7d 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32/config.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 0 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'M' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/interleave.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/word.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/word.h index 8ffcaaa..b27c6c9 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32/word.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -__forceinline uint64_t TOBI32(uint64_t in); - -__forceinline uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t TOBI32(uint64_t in) { - uint32_t hi = in >> 32; - uint32_t lo = in; - uint32_t r0, r1; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r0 = (lo & 0x0000FFFF) | (hi << 16); - r1 = (lo >> 16) | (hi & 0xFFFF0000); - return (uint64_t)r1 << 32 | r0; -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t FROMBI32(uint64_t in) { - uint32_t r0 = in; - uint32_t r1 = in >> 32; - uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); - uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - return (uint64_t)hi << 32 | lo; + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/aead.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/config.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/config.h index 4242e2e..0f5a485 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/config.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/interleave.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/word.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/word.h index 8ffcaaa..b27c6c9 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/word.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -__forceinline uint64_t TOBI32(uint64_t in); - -__forceinline uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t TOBI32(uint64_t in) { - uint32_t hi = in >> 32; - uint32_t lo = in; - uint32_t r0, r1; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r0 = (lo & 0x0000FFFF) | (hi << 16); - r1 = (lo >> 16) | (hi & 0xFFFF0000); - return (uint64_t)r1 << 32 | r0; -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t FROMBI32(uint64_t in) { - uint32_t r0 = in; - uint32_t r1 = in >> 32; - uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); - uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - return (uint64_t)hi << 32 | lo; + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/aead.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/config.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/config.h index 5ccce77..0f5a485 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/config.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'M' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/interleave.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/round.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/round.h index bc7a0cd..fa23bf3 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/round.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/round.h @@ -19,58 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { - uint32_t tmp_e, tmp_o; + word_t tmp, C = {.o = C_o, .e = C_e}; /* round constant */ - s->x2.e ^= C_e; - s->x2.o ^= C_o; + s->x2 = XOR(s->x2, C); /* s-box layer */ - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; - s->x4.e ^= s->x3.e; - s->x4.o ^= s->x3.o; - s->x2.e ^= s->x1.e; - s->x2.o ^= s->x1.o; - tmp_e = s->x0.e & (~s->x4.e); - tmp_o = s->x0.o & (~s->x4.o); - s->x0.e ^= s->x2.e & (~s->x1.e); - s->x0.o ^= s->x2.o & (~s->x1.o); - s->x2.e ^= s->x4.e & (~s->x3.e); - s->x2.o ^= s->x4.o & (~s->x3.o); - s->x4.e ^= s->x1.e & (~s->x0.e); - s->x4.o ^= s->x1.o & (~s->x0.o); - s->x1.e ^= s->x3.e & (~s->x2.e); - s->x1.o ^= s->x3.o & (~s->x2.o); - s->x3.e ^= tmp_e; - s->x3.o ^= tmp_o; - s->x1.e ^= s->x0.e; - s->x1.o ^= s->x0.o; - s->x3.e ^= s->x2.e; - s->x3.o ^= s->x2.o; - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); /* linear layer */ - tmp_e = s->x0.e ^ ROR32(s->x0.o, 4); - tmp_o = s->x0.o ^ ROR32(s->x0.e, 5); - s->x0.e ^= ROR32(tmp_o, 9); - s->x0.o ^= ROR32(tmp_e, 10); - tmp_e = s->x1.e ^ ROR32(s->x1.e, 11); - tmp_o = s->x1.o ^ ROR32(s->x1.o, 11); - s->x1.e ^= ROR32(tmp_o, 19); - s->x1.o ^= ROR32(tmp_e, 20); - tmp_e = s->x2.e ^ ROR32(s->x2.o, 2); - tmp_o = s->x2.o ^ ROR32(s->x2.e, 3); - s->x2.e ^= tmp_o; - s->x2.o ^= ROR32(tmp_e, 1); - tmp_e = s->x3.e ^ ROR32(s->x3.o, 3); - tmp_o = s->x3.o ^ ROR32(s->x3.e, 4); - s->x3.e ^= ROR32(tmp_e, 5); - s->x3.o ^= ROR32(tmp_o, 5); - tmp_e = s->x4.e ^ ROR32(s->x4.e, 17); - tmp_o = s->x4.o ^ ROR32(s->x4.o, 17); - s->x4.e ^= ROR32(tmp_o, 3); - s->x4.o ^= ROR32(tmp_e, 4); - s->x2.e = ~s->x2.e; - s->x2.o = ~s->x2.o; + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/word.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/word.h index 8ffcaaa..b27c6c9 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/word.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -__forceinline uint64_t TOBI32(uint64_t in); - -__forceinline uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t TOBI32(uint64_t in) { - uint32_t hi = in >> 32; - uint32_t lo = in; - uint32_t r0, r1; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r0 = (lo & 0x0000FFFF) | (hi << 16); - r1 = (lo >> 16) | (hi & 0xFFFF0000); - return (uint64_t)r1 << 32 | r0; -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t FROMBI32(uint64_t in) { - uint32_t r0 = in; - uint32_t r1 = in >> 32; - uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); - uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - return (uint64_t)hi << 32 | lo; + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/aead.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/aead.c new file mode 100644 index 0000000..6354194 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/aead.c @@ -0,0 +1,61 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t K0, K1, K2; + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = LOAD64(npub); + s->x4 = LOAD64(npub + 8); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_ABSORB); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + if (mode == ASCON_ENCRYPT) printstate("process plaintext", s); + if (mode == ASCON_DECRYPT) printstate("process ciphertext", s); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/ascon.h index aa685d3..c89ec89 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/ascon.h @@ -10,9 +10,11 @@ typedef struct { word_t x0, x1, x2, x3, x4; } state_t; -#define ASCON_AD 0 -#define ASCON_ENC 1 -#define ASCON_DEC 2 +#define ASCON_ABSORB 0x1 +#define ASCON_SQUEEZE 0x2 +#define ASCON_INSERT 0x4 +#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE) +#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT) void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, uint8_t mode); @@ -21,4 +23,4 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* k, uint8_t mode); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/config.h index 19426ab..ca59e3b 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/config.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'B' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c index 605bd03..42b1e4b 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -20,10 +19,10 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; /* ascon decryption */ - ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT); /* verify tag (should be constant time, check compiler output) */ - XOR(s.x3, LOAD64(c + *mlen)); - XOR(s.x4, LOAD64(c + *mlen + 8)); + s.x3 = XOR(s.x3, LOADBYTES(c + *mlen, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + *mlen + 8, 8)); if (NOTZERO(s.x3, s.x4)) { *mlen = 0; return -1; diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c index 641b266..80e2bcf 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -17,9 +16,9 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; /* ascon encryption */ - ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT); /* set tag */ - STORE64(c + mlen, s.x3); - STORE64(c + mlen + 8, s.x4); + STOREBYTES(c + mlen, s.x3, 8); + STOREBYTES(c + mlen + 8, s.x4, 8); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/interleave.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/interleave.c new file mode 100644 index 0000000..321d0ce --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/interleave.c @@ -0,0 +1,42 @@ +#include "interleave.h" + +static inline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +static inline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/interleave.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/interleave.h new file mode 100644 index 0000000..f6590fb --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/interleave.h @@ -0,0 +1,9 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +uint64_t deinterleave32(uint64_t in); +uint64_t interleave32(uint64_t in); + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/process.c index 81fdd15..a7624c6 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/process.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/process.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -13,13 +12,13 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, n = len < ASCON_RATE ? len : ASCON_RATE; /* absorb data */ tmp0 = LOAD(in, n); - XOR(s->x0, tmp0); + s->x0 = XOR(s->x0, tmp0); /* extract data */ - if (mode != ASCON_AD) STORE(out, s->x0, n); + if (mode & ASCON_SQUEEZE) STORE(out, s->x0, n); /* insert data */ - if (mode == ASCON_DEC) { - AND(s->x0, XMASK(n)); - XOR(s->x0, tmp0); + if (mode & ASCON_INSERT) { + s->x0 = CLEAR(s->x0, n); + s->x0 = XOR(s->x0, tmp0); } /* compute permutation for full blocks */ if (n == ASCON_RATE) PB(s); @@ -27,5 +26,5 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, out += n; len -= n; } - XOR(s->x0, PAD(n % 8)); + s->x0 = XOR(s->x0, PAD(n % 8)); } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/round.h index d8ea3b6..fa23bf3 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/round.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/round.h @@ -19,66 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { - state_t t; + word_t tmp, C = {.o = C_o, .e = C_e}; /* round constant */ - s->x2.e ^= C_e; - s->x2.o ^= C_o; + s->x2 = XOR(s->x2, C); /* s-box layer */ - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; - s->x4.e ^= s->x3.e; - s->x4.o ^= s->x3.o; - s->x2.e ^= s->x1.e; - s->x2.o ^= s->x1.o; - t.x0.e = s->x0.e; - t.x0.o = s->x0.o; - t.x4.e = s->x4.e; - t.x4.o = s->x4.o; - t.x3.e = s->x3.e; - t.x3.o = s->x3.o; - t.x1.e = s->x1.e; - t.x1.o = s->x1.o; - t.x2.e = s->x2.e; - t.x2.o = s->x2.o; - s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); - s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); - s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); - s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); - s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); - s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); - s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); - s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); - s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); - s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); - s->x1.e ^= s->x0.e; - s->x1.o ^= s->x0.o; - s->x3.e ^= s->x2.e; - s->x3.o ^= s->x2.o; - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); /* linear layer */ - t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); - t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); - t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); - t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); - t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); - t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); - t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); - t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); - t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); - t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); - s->x0.e ^= ROR32(t.x0.o, 9); - s->x0.o ^= ROR32(t.x0.e, 10); - s->x1.e ^= ROR32(t.x1.o, 19); - s->x1.o ^= ROR32(t.x1.e, 20); - s->x2.e ^= t.x2.o; - s->x2.o ^= ROR32(t.x2.e, 1); - s->x3.e ^= ROR32(t.x3.e, 5); - s->x3.o ^= ROR32(t.x3.o, 5); - s->x4.e ^= ROR32(t.x4.o, 3); - s->x4.o ^= ROR32(t.x4.e, 4); - s->x2.e = ~s->x2.e; - s->x2.o = ~s->x2.o; + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/word.h index 45184ca..b27c6c9 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/word.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -uint64_t TOBI32(uint64_t in); - -uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,22 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/aead.c b/ascon/Implementations/crypto_aead/ascon128v12/bi8/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/bi8/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/config.h b/ascon/Implementations/crypto_aead/ascon128v12/bi8/config.h new file mode 100644 index 0000000..ef5eb14 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/bi8/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/interleave.c b/ascon/Implementations/crypto_aead/ascon128v12/bi8/interleave.c new file mode 100644 index 0000000..659255b --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/interleave.c @@ -0,0 +1,12 @@ +#include "interleave.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t interleave8(uint64_t x) { + x = (x & 0xaa55aa55aa55aa55ull) | ((x & 0x00aa00aa00aa00aaull) << 7) | + ((x >> 7) & 0x00aa00aa00aa00aaull); + x = (x & 0xcccc3333cccc3333ull) | ((x & 0x0000cccc0000ccccull) << 14) | + ((x >> 14) & 0x0000cccc0000ccccull); + x = (x & 0xf0f0f0f00f0f0f0full) | ((x & 0x00000000f0f0f0f0ull) << 28) | + ((x >> 28) & 0x00000000f0f0f0f0ull); + return x; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/interleave.h b/ascon/Implementations/crypto_aead/ascon128v12/bi8/interleave.h new file mode 100644 index 0000000..62937e0 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/interleave.h @@ -0,0 +1,8 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +uint64_t interleave8(uint64_t x); + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/bi8/permutations.c new file mode 100644 index 0000000..a0cc038 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/permutations.c @@ -0,0 +1,45 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint64_t constants[12] = { + 0x0101010100000000ull, 0x0101010000000001ull, 0x0101000100000100ull, + 0x0101000000000101ull, 0x0100010100010000ull, 0x0100010000010001ull, + 0x0100000100010100ull, 0x0100000000010101ull, 0x0001010101000000ull, + 0x0001010001000001ull, 0x0001000101000100ull, 0x0001000001000101ull}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; ++i) ROUND(s, constants[i]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi8/permutations.h new file mode 100644 index 0000000..ee1b625 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/permutations.h @@ -0,0 +1,165 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x8040000020301000ull) +#define ASCON_128A_IV WORD_T(0xc000000030200000ull) +#define ASCON_80PQ_IV WORD_T(0x8040800020301000ull) +#define ASCON_HASH_IV WORD_T(0x0040000020200002ull) +#define ASCON_XOF_IV WORD_T(0x0040000020200000ull) + +#define ASCON_HASH_IV0 WORD_T(0xfa8e976bb985dc4dull) +#define ASCON_HASH_IV1 WORD_T(0xc8085072a40ccd94ull) +#define ASCON_HASH_IV2 WORD_T(0xfe1781be5a847314ull) +#define ASCON_HASH_IV3 WORD_T(0x2f871f6c6d0082b2ull) +#define ASCON_HASH_IV4 WORD_T(0x7a1ba68850ec407eull) + +#define ASCON_XOF_IV0 WORD_T(0x8a46f0d354e771b8ull) +#define ASCON_XOF_IV1 WORD_T(0x04489f4084368cd0ull) +#define ASCON_XOF_IV2 WORD_T(0x6c94f2150dbcf66cull) +#define ASCON_XOF_IV3 WORD_T(0x48965294f143b44eull) +#define ASCON_XOF_IV4 WORD_T(0x0788515fe0e5fb8aull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0x0101010100000000ull); + ROUND(s, 0x0101010000000001ull); + ROUND(s, 0x0101000100000100ull); + ROUND(s, 0x0101000000000101ull); + ROUND(s, 0x0100010100010000ull); + ROUND(s, 0x0100010000010001ull); + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x0100010100010000ull); + ROUND(s, 0x0100010000010001ull); + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint64_t constants[12]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; ++i) ROUND(s, constants[i]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; ++i) ROUND(s, constants[i]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; ++i) ROUND(s, constants[i]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/bi8/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/round.h b/ascon/Implementations/crypto_aead/ascon128v12/bi8/round.h new file mode 100644 index 0000000..15c7795 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, WORD_T(C)); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/word.h b/ascon/Implementations/crypto_aead/ascon128v12/bi8/word.h new file mode 100644 index 0000000..f1b5cbb --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/word.h @@ -0,0 +1,129 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" +#include "interleave.h" + +typedef union { + uint64_t w; + uint8_t b[8]; +} word_t; + +__forceinline uint8_t ROR8(uint8_t a, int n) { return a >> n | a << (8 - n); } + +__forceinline word_t ROR64(word_t a, int n) { + word_t b; + b.b[0] = ROR8(a.b[(n + 0) & 0x7], (n + 0) >> 3); + b.b[1] = ROR8(a.b[(n + 1) & 0x7], (n + 1) >> 3); + b.b[2] = ROR8(a.b[(n + 2) & 0x7], (n + 2) >> 3); + b.b[3] = ROR8(a.b[(n + 3) & 0x7], (n + 3) >> 3); + b.b[4] = ROR8(a.b[(n + 4) & 0x7], (n + 4) >> 3); + b.b[5] = ROR8(a.b[(n + 5) & 0x7], (n + 5) >> 3); + b.b[6] = ROR8(a.b[(n + 6) & 0x7], (n + 6) >> 3); + b.b[7] = ROR8(a.b[(n + 7) & 0x7], (n + 7) >> 3); + return b; +} + +__forceinline word_t WORD_T(uint64_t x) { + word_t w; + w.w = x; + return w; +} + +__forceinline uint64_t UINT64_T(word_t w) { + uint64_t x; + x = w.w; + return x; +} + +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(interleave8(x)); } + +__forceinline uint64_t WORDTOU64(word_t w) { return interleave8(UINT64_T(w)); } + +__forceinline word_t NOT(word_t a) { + a.w = ~a.w; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.w ^= b.w; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.w &= b.w; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t w; + w.w = lo2hi.w << 32 | hi2lo.w >> 32; + return w; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a.w | b.w; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { return (word_t){.b[7] = 0x80 >> i}; } + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint8_t m = 0xff >> n; + word_t mask = { + .b[0] = m, + .b[1] = m, + .b[2] = m, + .b[3] = m, + .b[4] = m, + .b[5] = m, + .b[6] = m, + .b[7] = m, + }; + return AND(w, mask); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/neon/ascon.h new file mode 100644 index 0000000..f6b6ebc --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/ascon.h @@ -0,0 +1,12 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/config.h b/ascon/Implementations/crypto_aead/ascon128v12/neon/config.h new file mode 100644 index 0000000..7dfad92 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 1 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/neon/permutations.h index f627d7b..98e4a9a 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/neon/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/permutations.h @@ -1,9 +1,9 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef struct { - uint64_t x0, x1, x2, x3, x4; -} state_t; +#include "ascon.h" +#include "config.h" +#include "round.h" static const uint64_t C[12] = { 0xffffffffffffff0full, 0xffffffffffffff1eull, 0xffffffffffffff2dull, @@ -12,52 +12,6 @@ static const uint64_t C[12] = { 0xffffffffffffff96ull, 0xffffffffffffffa5ull, 0xffffffffffffffb4ull, }; -/* clang-format off */ -#define ROUND(OFFSET) \ - "vldr d31, [%[C], #" #OFFSET "] \n\t" \ - "veor d0, d0, d4 \n\t" \ - "veor d4, d4, d3 \n\t" \ - "veor d2, d2, d31 \n\t" \ - "vbic d13, d0, d4 \n\t" \ - "vbic d12, d4, d3 \n\t" \ - "veor d2, d2, d1 \n\t" \ - "vbic d14, d1, d0 \n\t" \ - "vbic d11, d3, d2 \n\t" \ - "vbic d10, d2, d1 \n\t" \ - "veor q0, q0, q5 \n\t" \ - "veor q1, q1, q6 \n\t" \ - "veor d4, d4, d14 \n\t" \ - "veor d1, d1, d0 \n\t" \ - "veor d3, d3, d2 \n\t" \ - "veor d0, d0, d4 \n\t" \ - "vsri.64 d14, d4, #7 \n\t" \ - "vsri.64 d24, d4, #41 \n\t" \ - "vsri.64 d11, d1, #39 \n\t" \ - "vsri.64 d21, d1, #61 \n\t" \ - "vsri.64 d10, d0, #19 \n\t" \ - "vsri.64 d20, d0, #28 \n\t" \ - "vsri.64 d12, d2, #1 \n\t" \ - "vsri.64 d22, d2, #6 \n\t" \ - "vsri.64 d13, d3, #10 \n\t" \ - "vsri.64 d23, d3, #17 \n\t" \ - "vsli.64 d10, d0, #45 \n\t" \ - "vsli.64 d20, d0, #36 \n\t" \ - "vsli.64 d11, d1, #25 \n\t" \ - "vsli.64 d21, d1, #3 \n\t" \ - "vsli.64 d12, d2, #63 \n\t" \ - "vsli.64 d22, d2, #58 \n\t" \ - "vsli.64 d13, d3, #54 \n\t" \ - "vsli.64 d23, d3, #47 \n\t" \ - "vsli.64 d14, d4, #57 \n\t" \ - "vsli.64 d24, d4, #23 \n\t" \ - "veor q5, q5, q0 \n\t" \ - "veor q6, q6, q1 \n\t" \ - "veor d14, d14, d4 \n\t" \ - "veor q0, q5, q10 \n\t" \ - "veor d4, d14, d24 \n\t" \ - "veor q1, q6, q11 \n\t" -/* clang-format on */ - #define P12() \ __asm__ __volatile__ ( \ ".arm \n\t" \ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/round.h b/ascon/Implementations/crypto_aead/ascon128v12/neon/round.h new file mode 100644 index 0000000..40e9baa --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/round.h @@ -0,0 +1,50 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +/* clang-format off */ +#define ROUND(OFFSET) \ + "vldr d31, [%[C], #" #OFFSET "] \n\t" \ + "veor d0, d0, d4 \n\t" \ + "veor d4, d4, d3 \n\t" \ + "veor d2, d2, d31 \n\t" \ + "vbic d13, d0, d4 \n\t" \ + "vbic d12, d4, d3 \n\t" \ + "veor d2, d2, d1 \n\t" \ + "vbic d14, d1, d0 \n\t" \ + "vbic d11, d3, d2 \n\t" \ + "vbic d10, d2, d1 \n\t" \ + "veor q0, q0, q5 \n\t" \ + "veor q1, q1, q6 \n\t" \ + "veor d4, d4, d14 \n\t" \ + "veor d1, d1, d0 \n\t" \ + "veor d3, d3, d2 \n\t" \ + "veor d0, d0, d4 \n\t" \ + "vsri.64 d14, d4, #7 \n\t" \ + "vsri.64 d24, d4, #41 \n\t" \ + "vsri.64 d11, d1, #39 \n\t" \ + "vsri.64 d21, d1, #61 \n\t" \ + "vsri.64 d10, d0, #19 \n\t" \ + "vsri.64 d20, d0, #28 \n\t" \ + "vsri.64 d12, d2, #1 \n\t" \ + "vsri.64 d22, d2, #6 \n\t" \ + "vsri.64 d13, d3, #10 \n\t" \ + "vsri.64 d23, d3, #17 \n\t" \ + "vsli.64 d10, d0, #45 \n\t" \ + "vsli.64 d20, d0, #36 \n\t" \ + "vsli.64 d11, d1, #25 \n\t" \ + "vsli.64 d21, d1, #3 \n\t" \ + "vsli.64 d12, d2, #63 \n\t" \ + "vsli.64 d22, d2, #58 \n\t" \ + "vsli.64 d13, d3, #54 \n\t" \ + "vsli.64 d23, d3, #47 \n\t" \ + "vsli.64 d14, d4, #57 \n\t" \ + "vsli.64 d24, d4, #23 \n\t" \ + "veor q5, q5, q0 \n\t" \ + "veor q6, q6, q1 \n\t" \ + "veor d14, d14, d4 \n\t" \ + "veor q0, q5, q10 \n\t" \ + "veor d4, d14, d24 \n\t" \ + "veor q1, q6, q11 \n\t" +/* clang-format on */ + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/word.h b/ascon/Implementations/crypto_aead/ascon128v12/neon/word.h new file mode 100644 index 0000000..5a1519b --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/word.h @@ -0,0 +1,35 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#define WORDTOU64 +#define U64TOWORD + +typedef uint64_t word_t; + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STOREBYTES(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +static inline uint64_t CLEARBYTES(uint64_t x, int n) { + for (int i = 0; i < n; ++i) x &= ~SETBYTE(0xff, i); + return x; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/aead.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/config.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/config.h index 8d8a1a0..7dfad92 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64/config.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/permutations.h index 6172dd5..66f3cf3 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/word.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/word.h index 5d601bb..9acbd12 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64/word.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/word.h @@ -3,45 +3,78 @@ #include -#include "config.h" +#include "endian.h" typedef uint64_t word_t; #define WORD_T #define UINT64_T - #define U64TOWORD #define WORDTOU64 -#define XOR(a, b) \ - do { \ - (a) ^= (b); \ - } while (0) +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } -#define AND(a, b) \ - do { \ - (a) &= (b); \ - } while (0) +__forceinline word_t NOT(word_t a) { return ~a; } -__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } +__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; } + +__forceinline word_t AND(word_t a, word_t b) { return a & b; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return lo2hi << 32 | hi2lo >> 32; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a | b; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { - return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/aead.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/aead.c new file mode 100644 index 0000000..6354194 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/aead.c @@ -0,0 +1,61 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t K0, K1, K2; + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = LOAD64(npub); + s->x4 = LOAD64(npub + 8); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_ABSORB); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + if (mode == ASCON_ENCRYPT) printstate("process plaintext", s); + if (mode == ASCON_DECRYPT) printstate("process ciphertext", s); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/ascon.h index aa685d3..c89ec89 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/ascon.h @@ -10,9 +10,11 @@ typedef struct { word_t x0, x1, x2, x3, x4; } state_t; -#define ASCON_AD 0 -#define ASCON_ENC 1 -#define ASCON_DEC 2 +#define ASCON_ABSORB 0x1 +#define ASCON_SQUEEZE 0x2 +#define ASCON_INSERT 0x4 +#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE) +#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT) void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, uint8_t mode); @@ -21,4 +23,4 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* k, uint8_t mode); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/config.h index ec8bd6f..ca59e3b 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/config.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c index 605bd03..42b1e4b 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -20,10 +19,10 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; /* ascon decryption */ - ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT); /* verify tag (should be constant time, check compiler output) */ - XOR(s.x3, LOAD64(c + *mlen)); - XOR(s.x4, LOAD64(c + *mlen + 8)); + s.x3 = XOR(s.x3, LOADBYTES(c + *mlen, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + *mlen + 8, 8)); if (NOTZERO(s.x3, s.x4)) { *mlen = 0; return -1; diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c index 641b266..80e2bcf 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -17,9 +16,9 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; /* ascon encryption */ - ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT); /* set tag */ - STORE64(c + mlen, s.x3); - STORE64(c + mlen + 8, s.x4); + STOREBYTES(c + mlen, s.x3, 8); + STOREBYTES(c + mlen + 8, s.x4, 8); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h index 6172dd5..66f3cf3 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/process.c index 81fdd15..a7624c6 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/process.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/process.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -13,13 +12,13 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, n = len < ASCON_RATE ? len : ASCON_RATE; /* absorb data */ tmp0 = LOAD(in, n); - XOR(s->x0, tmp0); + s->x0 = XOR(s->x0, tmp0); /* extract data */ - if (mode != ASCON_AD) STORE(out, s->x0, n); + if (mode & ASCON_SQUEEZE) STORE(out, s->x0, n); /* insert data */ - if (mode == ASCON_DEC) { - AND(s->x0, XMASK(n)); - XOR(s->x0, tmp0); + if (mode & ASCON_INSERT) { + s->x0 = CLEAR(s->x0, n); + s->x0 = XOR(s->x0, tmp0); } /* compute permutation for full blocks */ if (n == ASCON_RATE) PB(s); @@ -27,5 +26,5 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, out += n; len -= n; } - XOR(s->x0, PAD(n % 8)); + s->x0 = XOR(s->x0, PAD(n % 8)); } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/round.h index 077cbfd..cc5cd9f 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/round.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/round.h @@ -19,50 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint64_t C) { - state_t t; - s->x2 ^= C; - s->x0 ^= s->x4; - s->x4 ^= s->x3; - s->x2 ^= s->x1; - t.x0 = s->x0; - t.x4 = s->x4; - t.x3 = s->x3; - t.x1 = s->x1; - t.x2 = s->x2; - s->x0 = t.x0 ^ (~t.x1 & t.x2); - s->x2 = t.x2 ^ (~t.x3 & t.x4); - s->x4 = t.x4 ^ (~t.x0 & t.x1); - s->x1 = t.x1 ^ (~t.x2 & t.x3); - s->x3 = t.x3 ^ (~t.x4 & t.x0); - s->x1 ^= s->x0; - t.x1 = s->x1; - s->x1 = ROR64(s->x1, 39); - s->x3 ^= s->x2; - t.x2 = s->x2; - s->x2 = ROR64(s->x2, 1); - t.x4 = s->x4; - t.x2 ^= s->x2; - s->x2 = ROR64(s->x2, 6 - 1); - t.x3 = s->x3; - t.x1 ^= s->x1; - s->x3 = ROR64(s->x3, 10); - s->x0 ^= s->x4; - s->x4 = ROR64(s->x4, 7); - t.x3 ^= s->x3; - s->x2 ^= t.x2; - s->x1 = ROR64(s->x1, 61 - 39); - t.x0 = s->x0; - s->x2 = ~s->x2; - s->x3 = ROR64(s->x3, 17 - 10); - t.x4 ^= s->x4; - s->x4 = ROR64(s->x4, 41 - 7); - s->x3 ^= t.x3; - s->x1 ^= t.x1; - s->x0 = ROR64(s->x0, 19); - s->x4 ^= t.x4; - t.x0 ^= s->x0; - s->x0 = ROR64(s->x0, 28 - 19); - s->x0 ^= t.x0; + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, C); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/word.h index 5d601bb..9acbd12 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/word.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/word.h @@ -3,45 +3,78 @@ #include -#include "config.h" +#include "endian.h" typedef uint64_t word_t; #define WORD_T #define UINT64_T - #define U64TOWORD #define WORDTOU64 -#define XOR(a, b) \ - do { \ - (a) ^= (b); \ - } while (0) +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } -#define AND(a, b) \ - do { \ - (a) &= (b); \ - } while (0) +__forceinline word_t NOT(word_t a) { return ~a; } -__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } +__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; } + +__forceinline word_t AND(word_t a, word_t b) { return a & b; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return lo2hi << 32 | hi2lo >> 32; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a | b; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { - return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt8/aead.c b/ascon/Implementations/crypto_aead/ascon128v12/opt8/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt8/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt8/api.h b/ascon/Implementations/crypto_aead/ascon128v12/opt8/api.h new file mode 100644 index 0000000..96a7c47 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt8/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt8/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/opt8/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt8/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt8/config.h b/ascon/Implementations/crypto_aead/ascon128v12/opt8/config.h new file mode 100644 index 0000000..ef5eb14 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt8/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt8/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/opt8/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt8/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt8/implementors b/ascon/Implementations/crypto_aead/ascon128v12/opt8/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt8/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt8/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/opt8/permutations.c new file mode 100644 index 0000000..8d39320 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt8/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt8/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/opt8/permutations.h new file mode 100644 index 0000000..66f3cf3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt8/permutations.h @@ -0,0 +1,163 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt8/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/opt8/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt8/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt8/round.h b/ascon/Implementations/crypto_aead/ascon128v12/opt8/round.h new file mode 100644 index 0000000..15c7795 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt8/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, WORD_T(C)); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt8/word.h b/ascon/Implementations/crypto_aead/ascon128v12/opt8/word.h new file mode 100644 index 0000000..873313d --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt8/word.h @@ -0,0 +1,115 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" + +typedef union { + uint64_t w; + uint8_t b[8]; +} word_t; + +#define U64TOWORD WORD_T +#define WORDTOU64 UINT64_T + +#define XMUL(i, x) \ + do { \ + tmp = (uint16_t)a.b[i] * (1 << (x)); \ + b.b[(byte_rol + (i)) & 0x7] ^= (uint8_t)tmp; \ + b.b[(byte_rol + (i) + 1) & 0x7] ^= tmp >> 8; \ + } while (0) + +__forceinline word_t ROR64(word_t a, int n) { + word_t b = {.w = 0ull}; + int bit_rol = (64 - n) & 0x7; + int byte_rol = (64 - n) >> 3; + uint16_t tmp; + XMUL(0, bit_rol); + XMUL(1, bit_rol); + XMUL(2, bit_rol); + XMUL(3, bit_rol); + XMUL(4, bit_rol); + XMUL(5, bit_rol); + XMUL(6, bit_rol); + XMUL(7, bit_rol); + return b; +} + +__forceinline word_t WORD_T(uint64_t x) { return (word_t){.w = x}; } + +__forceinline uint64_t UINT64_T(word_t w) { return w.w; } + +__forceinline word_t NOT(word_t a) { + a.w = ~a.w; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.w ^= b.w; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.w &= b.w; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return (word_t){.w = lo2hi.w << 32 | hi2lo.w >> 32}; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a.w | b.w; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/ref/ascon.h index 8ab0502..c998868 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/ref/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/ascon.h @@ -3,12 +3,8 @@ #include -#define WORDTOU64 - -typedef uint64_t word_t; - typedef struct { - word_t x0, x1, x2, x3, x4; + uint64_t x0, x1, x2, x3, x4; } state_t; -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/ref/decrypt.c index cfda43e..aeb8734 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/ref/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/decrypt.c @@ -1,8 +1,8 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" +#include "word.h" int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, const uint8_t* c, uint64_t clen, const uint8_t* ad, @@ -20,10 +20,10 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, *mlen = clen - CRYPTO_ABYTES; /* load key and nonce */ - K0 = LOAD(k, 8); - K1 = LOAD(k + 8, 8); - N0 = LOAD(npub, 8); - N1 = LOAD(npub + 8, 8); + K0 = LOADBYTES(k, 8); + K1 = LOADBYTES(k + 8, 8); + N0 = LOADBYTES(npub, 8); + N1 = LOADBYTES(npub + 8, 8); /* initialization */ s.x0 = ASCON_128_IV; @@ -39,13 +39,13 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* process associated data */ if (adlen) { while (adlen >= ASCON_128_RATE) { - s.x0 ^= LOAD(ad, 8); + s.x0 ^= LOADBYTES(ad, 8); P6(&s); ad += ASCON_128_RATE; adlen -= ASCON_128_RATE; } /* final associated data block */ - s.x0 ^= LOAD(ad, adlen); + s.x0 ^= LOADBYTES(ad, adlen); s.x0 ^= PAD(adlen); P6(&s); } @@ -55,8 +55,8 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* process ciphertext */ clen -= CRYPTO_ABYTES; while (clen >= ASCON_128_RATE) { - uint64_t c0 = LOAD(c, 8); - STORE(m, s.x0 ^ c0, 8); + uint64_t c0 = LOADBYTES(c, 8); + STOREBYTES(m, s.x0 ^ c0, 8); s.x0 = c0; P6(&s); m += ASCON_128_RATE; @@ -64,9 +64,9 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, clen -= ASCON_128_RATE; } /* final ciphertext block */ - uint64_t c0 = LOAD(c, clen); - STORE(m, s.x0 ^ c0, clen); - s.x0 &= ~MASK(clen); + uint64_t c0 = LOADBYTES(c, clen); + STOREBYTES(m, s.x0 ^ c0, clen); + s.x0 = CLEARBYTES(s.x0, clen); s.x0 |= c0; s.x0 ^= PAD(clen); c += clen; @@ -81,7 +81,7 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, printstate("finalization", &s); /* verify tag (should be constant time, check compiler output) */ - if ((s.x3 ^ LOAD(c, 8)) | (s.x4 ^ LOAD(c + 8, 8))) { + if ((s.x3 ^ LOADBYTES(c, 8)) | (s.x4 ^ LOADBYTES(c + 8, 8))) { *mlen = 0; return -1; } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/ref/encrypt.c index cd76506..a206f35 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/ref/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/encrypt.c @@ -1,8 +1,8 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" +#include "word.h" int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, uint64_t mlen, const uint8_t* ad, uint64_t adlen, @@ -16,10 +16,10 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, *clen = mlen + CRYPTO_ABYTES; /* load key and nonce */ - K0 = LOAD(k, 8); - K1 = LOAD(k + 8, 8); - N0 = LOAD(npub, 8); - N1 = LOAD(npub + 8, 8); + K0 = LOADBYTES(k, 8); + K1 = LOADBYTES(k + 8, 8); + N0 = LOADBYTES(npub, 8); + N1 = LOADBYTES(npub + 8, 8); /* initialization */ s.x0 = ASCON_128_IV; @@ -35,13 +35,13 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* process associated data */ if (adlen) { while (adlen >= ASCON_128_RATE) { - s.x0 ^= LOAD(ad, 8); + s.x0 ^= LOADBYTES(ad, 8); P6(&s); ad += ASCON_128_RATE; adlen -= ASCON_128_RATE; } /* final associated data block */ - s.x0 ^= LOAD(ad, adlen); + s.x0 ^= LOADBYTES(ad, adlen); s.x0 ^= PAD(adlen); P6(&s); } @@ -50,16 +50,16 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* process plaintext */ while (mlen >= ASCON_128_RATE) { - s.x0 ^= LOAD(m, 8); - STORE(c, s.x0, 8); + s.x0 ^= LOADBYTES(m, 8); + STOREBYTES(c, s.x0, 8); P6(&s); m += ASCON_128_RATE; c += ASCON_128_RATE; mlen -= ASCON_128_RATE; } /* final plaintext block */ - s.x0 ^= LOAD(m, mlen); - STORE(c, s.x0, mlen); + s.x0 ^= LOADBYTES(m, mlen); + STOREBYTES(c, s.x0, mlen); s.x0 ^= PAD(mlen); c += mlen; printstate("process plaintext", &s); @@ -73,8 +73,8 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, printstate("finalization", &s); /* set tag */ - STORE(c, s.x3, 8); - STORE(c + 8, s.x4, 8); + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/ref/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/ref/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/word.h b/ascon/Implementations/crypto_aead/ascon128v12/ref/word.h new file mode 100644 index 0000000..5a1519b --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/word.h @@ -0,0 +1,35 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#define WORDTOU64 +#define U64TOWORD + +typedef uint64_t word_t; + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STOREBYTES(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +static inline uint64_t CLEARBYTES(uint64_t x, int n) { + for (int i = 0; i < n; ++i) x &= ~SETBYTE(0xff, i); + return x; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/aead.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/config.h index b1b5080..1447e7d 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/config.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 0 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'M' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/interleave.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/word.h index 8ffcaaa..b27c6c9 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/word.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -__forceinline uint64_t TOBI32(uint64_t in); - -__forceinline uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t TOBI32(uint64_t in) { - uint32_t hi = in >> 32; - uint32_t lo = in; - uint32_t r0, r1; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r0 = (lo & 0x0000FFFF) | (hi << 16); - r1 = (lo >> 16) | (hi & 0xFFFF0000); - return (uint64_t)r1 << 32 | r0; -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t FROMBI32(uint64_t in) { - uint32_t r0 = in; - uint32_t r1 = in >> 32; - uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); - uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - return (uint64_t)hi << 32 | lo; + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/aead.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/api.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/api.h new file mode 100644 index 0000000..5fa0140 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 20 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/config.h new file mode 100644 index 0000000..0f5a485 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/endian.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/implementors b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/interleave.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/permutations.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/permutations.c new file mode 100644 index 0000000..56273c6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/permutations.c @@ -0,0 +1,44 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/permutations.h new file mode 100644 index 0000000..49fd52a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/permutations.h @@ -0,0 +1,168 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/round.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/round.h new file mode 100644 index 0000000..06da1ca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/round.h @@ -0,0 +1,102 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + uint32_t tmp0, tmp1, tmp2, tmp3; + /* clang-format off */ + __asm__ __volatile__( \ + "eor %[x2_e], %[x2_e], %[C_e]\n\t" \ + "eor %[x2_o], %[x2_o], %[C_o]\n\t" \ + "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ + "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ + "eor %[x4_e], %[x4_e], %[x3_e]\n\t" \ + "eor %[x4_o], %[x4_o], %[x3_o]\n\t" \ + "eor %[x2_e], %[x2_e], %[x1_e]\n\t" \ + "eor %[x2_o], %[x2_o], %[x1_o]\n\t" \ + "bic %[tmp0], %[x0_e], %[x4_e]\n\t" \ + "bic %[tmp1], %[x4_e], %[x3_e]\n\t" \ + "bic %[tmp2], %[x2_e], %[x1_e]\n\t" \ + "bic %[tmp3], %[x1_e], %[x0_e]\n\t" \ + "eor %[x2_e], %[x2_e], %[tmp1]\n\t" \ + "eor %[x0_e], %[x0_e], %[tmp2]\n\t" \ + "eor %[x4_e], %[x4_e], %[tmp3]\n\t" \ + "bic %[tmp3], %[x3_e], %[x2_e]\n\t" \ + "eor %[x3_e], %[x3_e], %[tmp0]\n\t" \ + "bic %[tmp2], %[x0_o], %[x4_o]\n\t" \ + "bic %[tmp0], %[x2_o], %[x1_o]\n\t" \ + "bic %[tmp1], %[x4_o], %[x3_o]\n\t" \ + "eor %[x1_e], %[x1_e], %[tmp3]\n\t" \ + "eor %[x0_o], %[x0_o], %[tmp0]\n\t" \ + "eor %[x2_o], %[x2_o], %[tmp1]\n\t" \ + "bic %[tmp3], %[x1_o], %[x0_o]\n\t" \ + "bic %[tmp0], %[x3_o], %[x2_o]\n\t" \ + "eor %[x3_o], %[x3_o], %[tmp2]\n\t" \ + "eor %[x3_o], %[x3_o], %[x2_o]\n\t" \ + "eor %[x4_o], %[x4_o], %[tmp3]\n\t" \ + "eor %[x1_o], %[x1_o], %[tmp0]\n\t" \ + "eor %[x3_e], %[x3_e], %[x2_e]\n\t" \ + "eor %[x1_e], %[x1_e], %[x0_e]\n\t" \ + "eor %[x1_o], %[x1_o], %[x0_o]\n\t" \ + "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ + "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ + "mvn %[x2_e], %[x2_e]\n\t" \ + "mvn %[x2_o], %[x2_o]\n\t" \ + "eor %[tmp0], %[x0_e], %[x0_o], ror #4\n\t" \ + "eor %[tmp1], %[x0_o], %[x0_e], ror #5\n\t" \ + "eor %[tmp2], %[x1_e], %[x1_e], ror #11\n\t" \ + "eor %[tmp3], %[x1_o], %[x1_o], ror #11\n\t" \ + "eor %[x0_e], %[x0_e], %[tmp1], ror #9\n\t" \ + "eor %[x0_o], %[x0_o], %[tmp0], ror #10\n\t" \ + "eor %[x1_e], %[x1_e], %[tmp3], ror #19\n\t" \ + "eor %[x1_o], %[x1_o], %[tmp2], ror #20\n\t" \ + "eor %[tmp0], %[x2_e], %[x2_o], ror #2\n\t" \ + "eor %[tmp1], %[x2_o], %[x2_e], ror #3\n\t" \ + "eor %[tmp2], %[x3_e], %[x3_o], ror #3\n\t" \ + "eor %[tmp3], %[x3_o], %[x3_e], ror #4\n\t" \ + "eor %[x2_e], %[x2_e], %[tmp1]\n\t" \ + "eor %[x2_o], %[x2_o], %[tmp0], ror #1\n\t" \ + "eor %[x3_e], %[x3_e], %[tmp2], ror #5\n\t" \ + "eor %[x3_o], %[x3_o], %[tmp3], ror #5\n\t" \ + "eor %[tmp0], %[x4_e], %[x4_e], ror #17\n\t" \ + "eor %[tmp1], %[x4_o], %[x4_o], ror #17\n\t" \ + "eor %[x4_e], %[x4_e], %[tmp1], ror #3\n\t" \ + "eor %[x4_o], %[x4_o], %[tmp0], ror #4\n\t" \ + : [ x0_e ] "+r"(s->x0.e), \ + [ x1_e ] "+r"(s->x1.e), \ + [ x2_e ] "+r"(s->x2.e), \ + [ x3_e ] "+r"(s->x3.e), \ + [ x4_e ] "+r"(s->x4.e), \ + [ x0_o ] "+r"(s->x0.o), \ + [ x1_o ] "+r"(s->x1.o), \ + [ x2_o ] "+r"(s->x2.o), \ + [ x3_o ] "+r"(s->x3.o), \ + [ x4_o ] "+r"(s->x4.o), \ + [ tmp0 ] "=r"(tmp0), \ + [ tmp1 ] "=r"(tmp1), \ + [ tmp2 ] "=r"(tmp2), \ + [ tmp3 ] "=r"(tmp3) \ + : [ C_e ] "i"(C_e), \ + [ C_o ] "i"(C_o) \ + : ); + /* clang-format on */ + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/word.h new file mode 100644 index 0000000..b27c6c9 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_arm/word.h @@ -0,0 +1,114 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" +#include "interleave.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } + +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } + +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/aead.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/api.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/api.h new file mode 100644 index 0000000..5fa0140 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 20 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/config.h new file mode 100644 index 0000000..0f5a485 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/endian.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/implementors b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/interleave.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/permutations.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/permutations.c new file mode 100644 index 0000000..56273c6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/permutations.c @@ -0,0 +1,44 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/permutations.h new file mode 100644 index 0000000..49fd52a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/permutations.h @@ -0,0 +1,168 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/round.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/round.h new file mode 100644 index 0000000..fa23bf3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + word_t tmp, C = {.o = C_o, .e = C_e}; + /* round constant */ + s->x2 = XOR(s->x2, C); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/word.h new file mode 100644 index 0000000..b27c6c9 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowreg/word.h @@ -0,0 +1,114 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" +#include "interleave.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } + +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } + +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/aead.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/aead.c new file mode 100644 index 0000000..6354194 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/aead.c @@ -0,0 +1,61 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t K0, K1, K2; + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = LOAD64(npub); + s->x4 = LOAD64(npub + 8); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_ABSORB); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + if (mode == ASCON_ENCRYPT) printstate("process plaintext", s); + if (mode == ASCON_DECRYPT) printstate("process ciphertext", s); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/ascon.h index aa685d3..c89ec89 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/ascon.h @@ -10,9 +10,11 @@ typedef struct { word_t x0, x1, x2, x3, x4; } state_t; -#define ASCON_AD 0 -#define ASCON_ENC 1 -#define ASCON_DEC 2 +#define ASCON_ABSORB 0x1 +#define ASCON_SQUEEZE 0x2 +#define ASCON_INSERT 0x4 +#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE) +#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT) void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, uint8_t mode); @@ -21,4 +23,4 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* k, uint8_t mode); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/config.h index 19426ab..ca59e3b 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/config.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'B' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/decrypt.c index 605bd03..42b1e4b 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/decrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -20,10 +19,10 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; /* ascon decryption */ - ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT); /* verify tag (should be constant time, check compiler output) */ - XOR(s.x3, LOAD64(c + *mlen)); - XOR(s.x4, LOAD64(c + *mlen + 8)); + s.x3 = XOR(s.x3, LOADBYTES(c + *mlen, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + *mlen + 8, 8)); if (NOTZERO(s.x3, s.x4)) { *mlen = 0; return -1; diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/encrypt.c index 641b266..80e2bcf 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/encrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -17,9 +16,9 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; /* ascon encryption */ - ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT); /* set tag */ - STORE64(c + mlen, s.x3); - STORE64(c + mlen + 8, s.x4); + STOREBYTES(c + mlen, s.x3, 8); + STOREBYTES(c + mlen + 8, s.x4, 8); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/interleave.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/interleave.c new file mode 100644 index 0000000..321d0ce --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/interleave.c @@ -0,0 +1,42 @@ +#include "interleave.h" + +static inline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +static inline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/interleave.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/interleave.h new file mode 100644 index 0000000..f6590fb --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/interleave.h @@ -0,0 +1,9 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +uint64_t deinterleave32(uint64_t in); +uint64_t interleave32(uint64_t in); + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/process.c index 81fdd15..a7624c6 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/process.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/process.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -13,13 +12,13 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, n = len < ASCON_RATE ? len : ASCON_RATE; /* absorb data */ tmp0 = LOAD(in, n); - XOR(s->x0, tmp0); + s->x0 = XOR(s->x0, tmp0); /* extract data */ - if (mode != ASCON_AD) STORE(out, s->x0, n); + if (mode & ASCON_SQUEEZE) STORE(out, s->x0, n); /* insert data */ - if (mode == ASCON_DEC) { - AND(s->x0, XMASK(n)); - XOR(s->x0, tmp0); + if (mode & ASCON_INSERT) { + s->x0 = CLEAR(s->x0, n); + s->x0 = XOR(s->x0, tmp0); } /* compute permutation for full blocks */ if (n == ASCON_RATE) PB(s); @@ -27,5 +26,5 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, out += n; len -= n; } - XOR(s->x0, PAD(n % 8)); + s->x0 = XOR(s->x0, PAD(n % 8)); } diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/round.h index d8ea3b6..fa23bf3 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/round.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/round.h @@ -19,66 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { - state_t t; + word_t tmp, C = {.o = C_o, .e = C_e}; /* round constant */ - s->x2.e ^= C_e; - s->x2.o ^= C_o; + s->x2 = XOR(s->x2, C); /* s-box layer */ - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; - s->x4.e ^= s->x3.e; - s->x4.o ^= s->x3.o; - s->x2.e ^= s->x1.e; - s->x2.o ^= s->x1.o; - t.x0.e = s->x0.e; - t.x0.o = s->x0.o; - t.x4.e = s->x4.e; - t.x4.o = s->x4.o; - t.x3.e = s->x3.e; - t.x3.o = s->x3.o; - t.x1.e = s->x1.e; - t.x1.o = s->x1.o; - t.x2.e = s->x2.e; - t.x2.o = s->x2.o; - s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); - s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); - s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); - s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); - s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); - s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); - s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); - s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); - s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); - s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); - s->x1.e ^= s->x0.e; - s->x1.o ^= s->x0.o; - s->x3.e ^= s->x2.e; - s->x3.o ^= s->x2.o; - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); /* linear layer */ - t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); - t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); - t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); - t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); - t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); - t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); - t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); - t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); - t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); - t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); - s->x0.e ^= ROR32(t.x0.o, 9); - s->x0.o ^= ROR32(t.x0.e, 10); - s->x1.e ^= ROR32(t.x1.o, 19); - s->x1.o ^= ROR32(t.x1.e, 20); - s->x2.e ^= t.x2.o; - s->x2.o ^= ROR32(t.x2.e, 1); - s->x3.e ^= ROR32(t.x3.e, 5); - s->x3.o ^= ROR32(t.x3.o, 5); - s->x4.e ^= ROR32(t.x4.o, 3); - s->x4.o ^= ROR32(t.x4.e, 4); - s->x2.e = ~s->x2.e; - s->x2.o = ~s->x2.o; + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/word.h index 45184ca..b27c6c9 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/word.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -uint64_t TOBI32(uint64_t in); - -uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,22 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/aead.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/api.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/api.h new file mode 100644 index 0000000..5fa0140 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 20 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/config.h new file mode 100644 index 0000000..ef5eb14 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/endian.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/implementors b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/interleave.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/interleave.c new file mode 100644 index 0000000..659255b --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/interleave.c @@ -0,0 +1,12 @@ +#include "interleave.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t interleave8(uint64_t x) { + x = (x & 0xaa55aa55aa55aa55ull) | ((x & 0x00aa00aa00aa00aaull) << 7) | + ((x >> 7) & 0x00aa00aa00aa00aaull); + x = (x & 0xcccc3333cccc3333ull) | ((x & 0x0000cccc0000ccccull) << 14) | + ((x >> 14) & 0x0000cccc0000ccccull); + x = (x & 0xf0f0f0f00f0f0f0full) | ((x & 0x00000000f0f0f0f0ull) << 28) | + ((x >> 28) & 0x00000000f0f0f0f0ull); + return x; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/interleave.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/interleave.h new file mode 100644 index 0000000..62937e0 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/interleave.h @@ -0,0 +1,8 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +uint64_t interleave8(uint64_t x); + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/permutations.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/permutations.c new file mode 100644 index 0000000..a0cc038 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/permutations.c @@ -0,0 +1,45 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint64_t constants[12] = { + 0x0101010100000000ull, 0x0101010000000001ull, 0x0101000100000100ull, + 0x0101000000000101ull, 0x0100010100010000ull, 0x0100010000010001ull, + 0x0100000100010100ull, 0x0100000000010101ull, 0x0001010101000000ull, + 0x0001010001000001ull, 0x0001000101000100ull, 0x0001000001000101ull}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; ++i) ROUND(s, constants[i]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/permutations.h new file mode 100644 index 0000000..ee1b625 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/permutations.h @@ -0,0 +1,165 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x8040000020301000ull) +#define ASCON_128A_IV WORD_T(0xc000000030200000ull) +#define ASCON_80PQ_IV WORD_T(0x8040800020301000ull) +#define ASCON_HASH_IV WORD_T(0x0040000020200002ull) +#define ASCON_XOF_IV WORD_T(0x0040000020200000ull) + +#define ASCON_HASH_IV0 WORD_T(0xfa8e976bb985dc4dull) +#define ASCON_HASH_IV1 WORD_T(0xc8085072a40ccd94ull) +#define ASCON_HASH_IV2 WORD_T(0xfe1781be5a847314ull) +#define ASCON_HASH_IV3 WORD_T(0x2f871f6c6d0082b2ull) +#define ASCON_HASH_IV4 WORD_T(0x7a1ba68850ec407eull) + +#define ASCON_XOF_IV0 WORD_T(0x8a46f0d354e771b8ull) +#define ASCON_XOF_IV1 WORD_T(0x04489f4084368cd0ull) +#define ASCON_XOF_IV2 WORD_T(0x6c94f2150dbcf66cull) +#define ASCON_XOF_IV3 WORD_T(0x48965294f143b44eull) +#define ASCON_XOF_IV4 WORD_T(0x0788515fe0e5fb8aull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0x0101010100000000ull); + ROUND(s, 0x0101010000000001ull); + ROUND(s, 0x0101000100000100ull); + ROUND(s, 0x0101000000000101ull); + ROUND(s, 0x0100010100010000ull); + ROUND(s, 0x0100010000010001ull); + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x0100010100010000ull); + ROUND(s, 0x0100010000010001ull); + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint64_t constants[12]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; ++i) ROUND(s, constants[i]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; ++i) ROUND(s, constants[i]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; ++i) ROUND(s, constants[i]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/round.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/round.h new file mode 100644 index 0000000..15c7795 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, WORD_T(C)); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/word.h new file mode 100644 index 0000000..218aff4 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi8/word.h @@ -0,0 +1,136 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" +#include "interleave.h" + +typedef union { + uint64_t w; + uint8_t b[8]; +} word_t; + +__forceinline uint8_t ROR8(uint8_t a, int n) { return a >> n | a << (8 - n); } + +__forceinline word_t ROR64(word_t a, int n) { + word_t b; + b.b[0] = ROR8(a.b[(n + 0) & 0x7], (n + 0) >> 3); + b.b[1] = ROR8(a.b[(n + 1) & 0x7], (n + 1) >> 3); + b.b[2] = ROR8(a.b[(n + 2) & 0x7], (n + 2) >> 3); + b.b[3] = ROR8(a.b[(n + 3) & 0x7], (n + 3) >> 3); + b.b[4] = ROR8(a.b[(n + 4) & 0x7], (n + 4) >> 3); + b.b[5] = ROR8(a.b[(n + 5) & 0x7], (n + 5) >> 3); + b.b[6] = ROR8(a.b[(n + 6) & 0x7], (n + 6) >> 3); + b.b[7] = ROR8(a.b[(n + 7) & 0x7], (n + 7) >> 3); + return b; +} + +__forceinline word_t WORD_T(uint64_t x) { + word_t w; + w.w = x; + return w; +} + +__forceinline uint64_t UINT64_T(word_t w) { + uint64_t x; + x = w.w; + return x; +} + +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(interleave8(x)); } + +__forceinline uint64_t WORDTOU64(word_t w) { return interleave8(UINT64_T(w)); } + +__forceinline word_t NOT(word_t a) { + a.w = ~a.w; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.w ^= b.w; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.w &= b.w; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t w; + w.b[0] = lo2hi.b[0] << 4 | hi2lo.b[0] >> 4; + w.b[1] = lo2hi.b[1] << 4 | hi2lo.b[1] >> 4; + w.b[2] = lo2hi.b[2] << 4 | hi2lo.b[2] >> 4; + w.b[3] = lo2hi.b[3] << 4 | hi2lo.b[3] >> 4; + w.b[4] = lo2hi.b[4] << 4 | hi2lo.b[4] >> 4; + w.b[5] = lo2hi.b[5] << 4 | hi2lo.b[5] >> 4; + w.b[6] = lo2hi.b[6] << 4 | hi2lo.b[6] >> 4; + w.b[7] = lo2hi.b[7] << 4 | hi2lo.b[7] >> 4; + return w; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a.w | b.w; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { return (word_t){.b[7] = 0x80 >> i}; } + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint8_t m = 0xff >> n; + word_t mask = { + .b[0] = m, + .b[1] = m, + .b[2] = m, + .b[3] = m, + .b[4] = m, + .b[5] = m, + .b[6] = m, + .b[7] = m, + }; + return AND(w, mask); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/aead.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/config.h index 8d8a1a0..7dfad92 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/config.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/permutations.h index 6172dd5..66f3cf3 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/word.h index 5d601bb..9acbd12 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/word.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/word.h @@ -3,45 +3,78 @@ #include -#include "config.h" +#include "endian.h" typedef uint64_t word_t; #define WORD_T #define UINT64_T - #define U64TOWORD #define WORDTOU64 -#define XOR(a, b) \ - do { \ - (a) ^= (b); \ - } while (0) +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } -#define AND(a, b) \ - do { \ - (a) &= (b); \ - } while (0) +__forceinline word_t NOT(word_t a) { return ~a; } -__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } +__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; } + +__forceinline word_t AND(word_t a, word_t b) { return a & b; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return lo2hi << 32 | hi2lo >> 32; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a | b; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { - return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/aead.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/aead.c new file mode 100644 index 0000000..6354194 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/aead.c @@ -0,0 +1,61 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t K0, K1, K2; + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = LOAD64(npub); + s->x4 = LOAD64(npub + 8); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_ABSORB); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + if (mode == ASCON_ENCRYPT) printstate("process plaintext", s); + if (mode == ASCON_DECRYPT) printstate("process ciphertext", s); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/ascon.h index aa685d3..c89ec89 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/ascon.h @@ -10,9 +10,11 @@ typedef struct { word_t x0, x1, x2, x3, x4; } state_t; -#define ASCON_AD 0 -#define ASCON_ENC 1 -#define ASCON_DEC 2 +#define ASCON_ABSORB 0x1 +#define ASCON_SQUEEZE 0x2 +#define ASCON_INSERT 0x4 +#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE) +#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT) void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, uint8_t mode); @@ -21,4 +23,4 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* k, uint8_t mode); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/config.h index ec8bd6f..ca59e3b 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/config.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c index 605bd03..42b1e4b 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -20,10 +19,10 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; /* ascon decryption */ - ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT); /* verify tag (should be constant time, check compiler output) */ - XOR(s.x3, LOAD64(c + *mlen)); - XOR(s.x4, LOAD64(c + *mlen + 8)); + s.x3 = XOR(s.x3, LOADBYTES(c + *mlen, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + *mlen + 8, 8)); if (NOTZERO(s.x3, s.x4)) { *mlen = 0; return -1; diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c index 641b266..80e2bcf 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -17,9 +16,9 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; /* ascon encryption */ - ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT); /* set tag */ - STORE64(c + mlen, s.x3); - STORE64(c + mlen + 8, s.x4); + STOREBYTES(c + mlen, s.x3, 8); + STOREBYTES(c + mlen + 8, s.x4, 8); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h index 6172dd5..66f3cf3 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/process.c index 81fdd15..a7624c6 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/process.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/process.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -13,13 +12,13 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, n = len < ASCON_RATE ? len : ASCON_RATE; /* absorb data */ tmp0 = LOAD(in, n); - XOR(s->x0, tmp0); + s->x0 = XOR(s->x0, tmp0); /* extract data */ - if (mode != ASCON_AD) STORE(out, s->x0, n); + if (mode & ASCON_SQUEEZE) STORE(out, s->x0, n); /* insert data */ - if (mode == ASCON_DEC) { - AND(s->x0, XMASK(n)); - XOR(s->x0, tmp0); + if (mode & ASCON_INSERT) { + s->x0 = CLEAR(s->x0, n); + s->x0 = XOR(s->x0, tmp0); } /* compute permutation for full blocks */ if (n == ASCON_RATE) PB(s); @@ -27,5 +26,5 @@ void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, out += n; len -= n; } - XOR(s->x0, PAD(n % 8)); + s->x0 = XOR(s->x0, PAD(n % 8)); } diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/round.h index 077cbfd..cc5cd9f 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/round.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/round.h @@ -19,50 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint64_t C) { - state_t t; - s->x2 ^= C; - s->x0 ^= s->x4; - s->x4 ^= s->x3; - s->x2 ^= s->x1; - t.x0 = s->x0; - t.x4 = s->x4; - t.x3 = s->x3; - t.x1 = s->x1; - t.x2 = s->x2; - s->x0 = t.x0 ^ (~t.x1 & t.x2); - s->x2 = t.x2 ^ (~t.x3 & t.x4); - s->x4 = t.x4 ^ (~t.x0 & t.x1); - s->x1 = t.x1 ^ (~t.x2 & t.x3); - s->x3 = t.x3 ^ (~t.x4 & t.x0); - s->x1 ^= s->x0; - t.x1 = s->x1; - s->x1 = ROR64(s->x1, 39); - s->x3 ^= s->x2; - t.x2 = s->x2; - s->x2 = ROR64(s->x2, 1); - t.x4 = s->x4; - t.x2 ^= s->x2; - s->x2 = ROR64(s->x2, 6 - 1); - t.x3 = s->x3; - t.x1 ^= s->x1; - s->x3 = ROR64(s->x3, 10); - s->x0 ^= s->x4; - s->x4 = ROR64(s->x4, 7); - t.x3 ^= s->x3; - s->x2 ^= t.x2; - s->x1 = ROR64(s->x1, 61 - 39); - t.x0 = s->x0; - s->x2 = ~s->x2; - s->x3 = ROR64(s->x3, 17 - 10); - t.x4 ^= s->x4; - s->x4 = ROR64(s->x4, 41 - 7); - s->x3 ^= t.x3; - s->x1 ^= t.x1; - s->x0 = ROR64(s->x0, 19); - s->x4 ^= t.x4; - t.x0 ^= s->x0; - s->x0 = ROR64(s->x0, 28 - 19); - s->x0 ^= t.x0; + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, C); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/word.h index 5d601bb..9acbd12 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/word.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/word.h @@ -3,45 +3,78 @@ #include -#include "config.h" +#include "endian.h" typedef uint64_t word_t; #define WORD_T #define UINT64_T - #define U64TOWORD #define WORDTOU64 -#define XOR(a, b) \ - do { \ - (a) ^= (b); \ - } while (0) +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } -#define AND(a, b) \ - do { \ - (a) &= (b); \ - } while (0) +__forceinline word_t NOT(word_t a) { return ~a; } -__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } +__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; } + +__forceinline word_t AND(word_t a, word_t b) { return a & b; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return lo2hi << 32 | hi2lo >> 32; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a | b; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { - return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/aead.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/aead.c new file mode 100644 index 0000000..5429f06 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/aead.c @@ -0,0 +1,253 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + *K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + *K1 = XOR(*K1, LOAD64(k)); + *K2 = XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1, + word_t K2) { + word_t N0, N1; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* initialization */ + PINIT(s); + s->x0 = XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + s->x3 = XOR(s->x3, N0); + s->x4 = XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) *px = XOR(*px, LOAD(ad, adlen)); + *px = XOR(*px, PAD(adlen)); + PB(s); + } + s->x4 = XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + s->x1 = XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + s->x0 = XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + *px = XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + *px = XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + s->x1 = XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + s->x0 = XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + *px = XOR(*px, cx); + STORE(m, *px, clen); + *px = CLEAR(*px, clen); + *px = XOR(*px, cx); + } + *px = XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) { + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + s->x1 = XOR(s->x1, K1); + s->x2 = XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + s->x2 = XOR(s->x2, K1); + s->x3 = XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + s->x1 = XOR(s->x1, KEYROT(K0, K1)); + s->x2 = XOR(s->x2, KEYROT(K1, K2)); + s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + s->x3 = XOR(s->x3, K1); + s->x4 = XOR(s->x4, K2); + printstate("finalization", s); +} + +#if ASCON_INLINE_MODE + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#else + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + init(s, npub, K0, K1, K2); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + loadkey(&K0, &K1, &K2, k); + final(s, K0, K1, K2); +} + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, K0, K1, K2); + /* set tag */ + c += mlen; + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + word_t K0, K1, K2; + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + loadkey(&K0, &K1, &K2, k); + INIT(&s, npub, K0, K1, K2); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, K0, K1, K2); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + s.x3 = XOR(s.x3, LOADBYTES(c, 8)); + s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/api.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/api.h new file mode 100644 index 0000000..5fa0140 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 20 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/config.h new file mode 100644 index 0000000..ef5eb14 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/endian.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/implementors b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/permutations.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/permutations.c new file mode 100644 index 0000000..8d39320 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/permutations.h new file mode 100644 index 0000000..66f3cf3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/permutations.h @@ -0,0 +1,163 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/round.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/round.h new file mode 100644 index 0000000..15c7795 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, WORD_T(C)); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/word.h new file mode 100644 index 0000000..873313d --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt8/word.h @@ -0,0 +1,115 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" + +typedef union { + uint64_t w; + uint8_t b[8]; +} word_t; + +#define U64TOWORD WORD_T +#define WORDTOU64 UINT64_T + +#define XMUL(i, x) \ + do { \ + tmp = (uint16_t)a.b[i] * (1 << (x)); \ + b.b[(byte_rol + (i)) & 0x7] ^= (uint8_t)tmp; \ + b.b[(byte_rol + (i) + 1) & 0x7] ^= tmp >> 8; \ + } while (0) + +__forceinline word_t ROR64(word_t a, int n) { + word_t b = {.w = 0ull}; + int bit_rol = (64 - n) & 0x7; + int byte_rol = (64 - n) >> 3; + uint16_t tmp; + XMUL(0, bit_rol); + XMUL(1, bit_rol); + XMUL(2, bit_rol); + XMUL(3, bit_rol); + XMUL(4, bit_rol); + XMUL(5, bit_rol); + XMUL(6, bit_rol); + XMUL(7, bit_rol); + return b; +} + +__forceinline word_t WORD_T(uint64_t x) { return (word_t){.w = x}; } + +__forceinline uint64_t UINT64_T(word_t w) { return w.w; } + +__forceinline word_t NOT(word_t a) { + a.w = ~a.w; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.w ^= b.w; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.w &= b.w; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return (word_t){.w = lo2hi.w << 32 | hi2lo.w >> 32}; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a.w | b.w; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/ascon.h index 8ab0502..c998868 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/ascon.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/ascon.h @@ -3,12 +3,8 @@ #include -#define WORDTOU64 - -typedef uint64_t word_t; - typedef struct { - word_t x0, x1, x2, x3, x4; + uint64_t x0, x1, x2, x3, x4; } state_t; -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/decrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/decrypt.c index e0881ac..fabb353 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/decrypt.c @@ -1,8 +1,8 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" +#include "word.h" int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, const uint8_t* c, uint64_t clen, const uint8_t* ad, @@ -20,11 +20,11 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, *mlen = clen - CRYPTO_ABYTES; /* load key and nonce */ - K0 = LOAD(k + 0, 4) >> 32; - K1 = LOAD(k + 4, 8); - K2 = LOAD(k + 12, 8); - N0 = LOAD(npub, 8); - N1 = LOAD(npub + 8, 8); + K0 = LOADBYTES(k + 0, 4) >> 32; + K1 = LOADBYTES(k + 4, 8); + K2 = LOADBYTES(k + 12, 8); + N0 = LOADBYTES(npub, 8); + N1 = LOADBYTES(npub + 8, 8); /* initialization */ s.x0 = ASCON_80PQ_IV | K0; @@ -41,13 +41,13 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* process associated data */ if (adlen) { while (adlen >= ASCON_128_RATE) { - s.x0 ^= LOAD(ad, 8); + s.x0 ^= LOADBYTES(ad, 8); P6(&s); ad += ASCON_128_RATE; adlen -= ASCON_128_RATE; } /* final associated data block */ - s.x0 ^= LOAD(ad, adlen); + s.x0 ^= LOADBYTES(ad, adlen); s.x0 ^= PAD(adlen); P6(&s); } @@ -57,8 +57,8 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, /* process ciphertext */ clen -= CRYPTO_ABYTES; while (clen >= ASCON_128_RATE) { - uint64_t c0 = LOAD(c, 8); - STORE(m, s.x0 ^ c0, 8); + uint64_t c0 = LOADBYTES(c, 8); + STOREBYTES(m, s.x0 ^ c0, 8); s.x0 = c0; P6(&s); m += ASCON_128_RATE; @@ -66,9 +66,9 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, clen -= ASCON_128_RATE; } /* final ciphertext block */ - uint64_t c0 = LOAD(c, clen); - STORE(m, s.x0 ^ c0, clen); - s.x0 &= ~MASK(clen); + uint64_t c0 = LOADBYTES(c, clen); + STOREBYTES(m, s.x0 ^ c0, clen); + s.x0 = CLEARBYTES(s.x0, clen); s.x0 |= c0; s.x0 ^= PAD(clen); c += clen; @@ -84,7 +84,7 @@ int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, printstate("finalization", &s); /* verify tag (should be constant time, check compiler output) */ - if ((s.x3 ^ LOAD(c, 8)) | (s.x4 ^ LOAD(c + 8, 8))) { + if ((s.x3 ^ LOADBYTES(c, 8)) | (s.x4 ^ LOADBYTES(c + 8, 8))) { *mlen = 0; return -1; } diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/encrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/encrypt.c index 4d7f9ac..81d6b43 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/encrypt.c @@ -1,8 +1,8 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" +#include "word.h" int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, uint64_t mlen, const uint8_t* ad, uint64_t adlen, @@ -16,11 +16,11 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, *clen = mlen + CRYPTO_ABYTES; /* load key and nonce */ - K0 = LOAD(k + 0, 4) >> 32; - K1 = LOAD(k + 4, 8); - K2 = LOAD(k + 12, 8); - N0 = LOAD(npub, 8); - N1 = LOAD(npub + 8, 8); + K0 = LOADBYTES(k + 0, 4) >> 32; + K1 = LOADBYTES(k + 4, 8); + K2 = LOADBYTES(k + 12, 8); + N0 = LOADBYTES(npub, 8); + N1 = LOADBYTES(npub + 8, 8); /* initialization */ s.x0 = ASCON_80PQ_IV | K0; @@ -37,13 +37,13 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* process associated data */ if (adlen) { while (adlen >= ASCON_128_RATE) { - s.x0 ^= LOAD(ad, 8); + s.x0 ^= LOADBYTES(ad, 8); P6(&s); ad += ASCON_128_RATE; adlen -= ASCON_128_RATE; } /* final associated data block */ - s.x0 ^= LOAD(ad, adlen); + s.x0 ^= LOADBYTES(ad, adlen); s.x0 ^= PAD(adlen); P6(&s); } @@ -52,16 +52,16 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, /* process plaintext */ while (mlen >= ASCON_128_RATE) { - s.x0 ^= LOAD(m, 8); - STORE(c, s.x0, 8); + s.x0 ^= LOADBYTES(m, 8); + STOREBYTES(c, s.x0, 8); P6(&s); m += ASCON_128_RATE; c += ASCON_128_RATE; mlen -= ASCON_128_RATE; } /* final plaintext block */ - s.x0 ^= LOAD(m, mlen); - STORE(c, s.x0, mlen); + s.x0 ^= LOADBYTES(m, mlen); + STOREBYTES(c, s.x0, mlen); s.x0 ^= PAD(mlen); c += mlen; printstate("process plaintext", &s); @@ -76,8 +76,8 @@ int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, printstate("finalization", &s); /* set tag */ - STORE(c, s.x3, 8); - STORE(c + 8, s.x4, 8); + STOREBYTES(c, s.x3, 8); + STOREBYTES(c + 8, s.x4, 8); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/printstate.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/word.h new file mode 100644 index 0000000..5a1519b --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/word.h @@ -0,0 +1,35 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#define WORDTOU64 +#define U64TOWORD + +typedef uint64_t word_t; + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STOREBYTES(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +static inline uint64_t CLEARBYTES(uint64_t x, int n) { + for (int i = 0; i < n; ++i) x &= ~SETBYTE(0xff, i); + return x; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32/ascon.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/config.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/config.h index b1b5080..1447e7d 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32/config.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 0 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'M' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32/hash.c index 34dec63..bf77d61 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/hash.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -26,13 +25,13 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* absorb plaintext */ while (len >= ASCON_RATE) { - XOR(s.x0, LOAD64(in)); + s.x0 = XOR(s.x0, LOAD64(in)); P12(&s); in += ASCON_RATE; len -= ASCON_RATE; } - if (len) XOR(s.x0, LOAD(in, len)); - XOR(s.x0, PAD(len)); + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); P12(&s); printstate("absorb plaintext", &s); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/interleave.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32/permutations.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32/printstate.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/word.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/word.h index 8ffcaaa..b27c6c9 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32/word.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -__forceinline uint64_t TOBI32(uint64_t in); - -__forceinline uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t TOBI32(uint64_t in) { - uint32_t hi = in >> 32; - uint32_t lo = in; - uint32_t r0, r1; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r0 = (lo & 0x0000FFFF) | (hi << 16); - r1 = (lo >> 16) | (hi & 0xFFFF0000); - return (uint64_t)r1 << 32 | r0; -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t FROMBI32(uint64_t in) { - uint32_t r0 = in; - uint32_t r1 = in >> 32; - uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); - uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - return (uint64_t)hi << 32 | lo; + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/ascon.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/config.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/config.h index 4242e2e..0f5a485 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/config.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/hash.c index 34dec63..bf77d61 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/hash.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -26,13 +25,13 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* absorb plaintext */ while (len >= ASCON_RATE) { - XOR(s.x0, LOAD64(in)); + s.x0 = XOR(s.x0, LOAD64(in)); P12(&s); in += ASCON_RATE; len -= ASCON_RATE; } - if (len) XOR(s.x0, LOAD(in, len)); - XOR(s.x0, PAD(len)); + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); P12(&s); printstate("absorb plaintext", &s); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/interleave.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/permutations.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/printstate.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/word.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/word.h index 8ffcaaa..b27c6c9 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/word.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -__forceinline uint64_t TOBI32(uint64_t in); - -__forceinline uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t TOBI32(uint64_t in) { - uint32_t hi = in >> 32; - uint32_t lo = in; - uint32_t r0, r1; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r0 = (lo & 0x0000FFFF) | (hi << 16); - r1 = (lo >> 16) | (hi & 0xFFFF0000); - return (uint64_t)r1 << 32 | r0; -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t FROMBI32(uint64_t in) { - uint32_t r0 = in; - uint32_t r1 = in >> 32; - uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); - uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - return (uint64_t)hi << 32 | lo; + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/ascon.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/config.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/config.h index 5ccce77..0f5a485 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/config.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'M' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/hash.c index 34dec63..bf77d61 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/hash.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -26,13 +25,13 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* absorb plaintext */ while (len >= ASCON_RATE) { - XOR(s.x0, LOAD64(in)); + s.x0 = XOR(s.x0, LOAD64(in)); P12(&s); in += ASCON_RATE; len -= ASCON_RATE; } - if (len) XOR(s.x0, LOAD(in, len)); - XOR(s.x0, PAD(len)); + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); P12(&s); printstate("absorb plaintext", &s); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/interleave.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/permutations.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/printstate.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/round.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/round.h index bc7a0cd..fa23bf3 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/round.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/round.h @@ -19,58 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { - uint32_t tmp_e, tmp_o; + word_t tmp, C = {.o = C_o, .e = C_e}; /* round constant */ - s->x2.e ^= C_e; - s->x2.o ^= C_o; + s->x2 = XOR(s->x2, C); /* s-box layer */ - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; - s->x4.e ^= s->x3.e; - s->x4.o ^= s->x3.o; - s->x2.e ^= s->x1.e; - s->x2.o ^= s->x1.o; - tmp_e = s->x0.e & (~s->x4.e); - tmp_o = s->x0.o & (~s->x4.o); - s->x0.e ^= s->x2.e & (~s->x1.e); - s->x0.o ^= s->x2.o & (~s->x1.o); - s->x2.e ^= s->x4.e & (~s->x3.e); - s->x2.o ^= s->x4.o & (~s->x3.o); - s->x4.e ^= s->x1.e & (~s->x0.e); - s->x4.o ^= s->x1.o & (~s->x0.o); - s->x1.e ^= s->x3.e & (~s->x2.e); - s->x1.o ^= s->x3.o & (~s->x2.o); - s->x3.e ^= tmp_e; - s->x3.o ^= tmp_o; - s->x1.e ^= s->x0.e; - s->x1.o ^= s->x0.o; - s->x3.e ^= s->x2.e; - s->x3.o ^= s->x2.o; - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); /* linear layer */ - tmp_e = s->x0.e ^ ROR32(s->x0.o, 4); - tmp_o = s->x0.o ^ ROR32(s->x0.e, 5); - s->x0.e ^= ROR32(tmp_o, 9); - s->x0.o ^= ROR32(tmp_e, 10); - tmp_e = s->x1.e ^ ROR32(s->x1.e, 11); - tmp_o = s->x1.o ^ ROR32(s->x1.o, 11); - s->x1.e ^= ROR32(tmp_o, 19); - s->x1.o ^= ROR32(tmp_e, 20); - tmp_e = s->x2.e ^ ROR32(s->x2.o, 2); - tmp_o = s->x2.o ^ ROR32(s->x2.e, 3); - s->x2.e ^= tmp_o; - s->x2.o ^= ROR32(tmp_e, 1); - tmp_e = s->x3.e ^ ROR32(s->x3.o, 3); - tmp_o = s->x3.o ^ ROR32(s->x3.e, 4); - s->x3.e ^= ROR32(tmp_e, 5); - s->x3.o ^= ROR32(tmp_o, 5); - tmp_e = s->x4.e ^ ROR32(s->x4.e, 17); - tmp_o = s->x4.o ^ ROR32(s->x4.o, 17); - s->x4.e ^= ROR32(tmp_o, 3); - s->x4.o ^= ROR32(tmp_e, 4); - s->x2.e = ~s->x2.e; - s->x2.o = ~s->x2.o; + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/word.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/word.h index 8ffcaaa..b27c6c9 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/word.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -__forceinline uint64_t TOBI32(uint64_t in); - -__forceinline uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t TOBI32(uint64_t in) { - uint32_t hi = in >> 32; - uint32_t lo = in; - uint32_t r0, r1; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r0 = (lo & 0x0000FFFF) | (hi << 16); - r1 = (lo >> 16) | (hi & 0xFFFF0000); - return (uint64_t)r1 << 32 | r0; -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t FROMBI32(uint64_t in) { - uint32_t r0 = in; - uint32_t r1 = in >> 32; - uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); - uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - return (uint64_t)hi << 32 | lo; + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/ascon.h index aa685d3..c89ec89 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/ascon.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/ascon.h @@ -10,9 +10,11 @@ typedef struct { word_t x0, x1, x2, x3, x4; } state_t; -#define ASCON_AD 0 -#define ASCON_ENC 1 -#define ASCON_DEC 2 +#define ASCON_ABSORB 0x1 +#define ASCON_SQUEEZE 0x2 +#define ASCON_INSERT 0x4 +#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE) +#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT) void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, uint8_t mode); @@ -21,4 +23,4 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* k, uint8_t mode); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/config.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/config.h index 19426ab..ca59e3b 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/config.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'B' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/hash.c index 34dec63..bf77d61 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/hash.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -26,13 +25,13 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* absorb plaintext */ while (len >= ASCON_RATE) { - XOR(s.x0, LOAD64(in)); + s.x0 = XOR(s.x0, LOAD64(in)); P12(&s); in += ASCON_RATE; len -= ASCON_RATE; } - if (len) XOR(s.x0, LOAD(in, len)); - XOR(s.x0, PAD(len)); + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); P12(&s); printstate("absorb plaintext", &s); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/interleave.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/interleave.c new file mode 100644 index 0000000..321d0ce --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/interleave.c @@ -0,0 +1,42 @@ +#include "interleave.h" + +static inline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +static inline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/interleave.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/interleave.h new file mode 100644 index 0000000..f6590fb --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/interleave.h @@ -0,0 +1,9 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +uint64_t deinterleave32(uint64_t in); +uint64_t interleave32(uint64_t in); + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/permutations.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/printstate.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/round.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/round.h index d8ea3b6..fa23bf3 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/round.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/round.h @@ -19,66 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { - state_t t; + word_t tmp, C = {.o = C_o, .e = C_e}; /* round constant */ - s->x2.e ^= C_e; - s->x2.o ^= C_o; + s->x2 = XOR(s->x2, C); /* s-box layer */ - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; - s->x4.e ^= s->x3.e; - s->x4.o ^= s->x3.o; - s->x2.e ^= s->x1.e; - s->x2.o ^= s->x1.o; - t.x0.e = s->x0.e; - t.x0.o = s->x0.o; - t.x4.e = s->x4.e; - t.x4.o = s->x4.o; - t.x3.e = s->x3.e; - t.x3.o = s->x3.o; - t.x1.e = s->x1.e; - t.x1.o = s->x1.o; - t.x2.e = s->x2.e; - t.x2.o = s->x2.o; - s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); - s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); - s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); - s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); - s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); - s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); - s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); - s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); - s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); - s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); - s->x1.e ^= s->x0.e; - s->x1.o ^= s->x0.o; - s->x3.e ^= s->x2.e; - s->x3.o ^= s->x2.o; - s->x0.e ^= s->x4.e; - s->x0.o ^= s->x4.o; + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); /* linear layer */ - t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); - t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); - t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); - t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); - t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); - t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); - t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); - t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); - t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); - t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); - s->x0.e ^= ROR32(t.x0.o, 9); - s->x0.o ^= ROR32(t.x0.e, 10); - s->x1.e ^= ROR32(t.x1.o, 19); - s->x1.o ^= ROR32(t.x1.e, 20); - s->x2.e ^= t.x2.o; - s->x2.o ^= ROR32(t.x2.e, 1); - s->x3.e ^= ROR32(t.x3.e, 5); - s->x3.o ^= ROR32(t.x3.o, 5); - s->x4.e ^= ROR32(t.x4.o, 3); - s->x4.o ^= ROR32(t.x4.e, 4); - s->x2.e = ~s->x2.e; - s->x2.o = ~s->x2.o; + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/word.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/word.h index 45184ca..b27c6c9 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/word.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -uint64_t TOBI32(uint64_t in); - -uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,22 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/bi8/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/config.h b/ascon/Implementations/crypto_hash/asconhashv12/bi8/config.h new file mode 100644 index 0000000..ef5eb14 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/endian.h b/ascon/Implementations/crypto_hash/asconhashv12/bi8/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/bi8/hash.c index 056b3fd..bf77d61 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi8/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/hash.c @@ -1,670 +1,50 @@ -#include - #include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" -#define ROR8(x, n) (((x) >> (n)) | ((x) << (8 - (n)))) - -#define COMPRESS_BYTE_ARRAY_8(a, var_7, var_6, var_5, var_4, var_3, var_2, \ - var_1, var_0) \ - do { \ - var_0 = a[7]; \ - var_1 = var_0 >> 1; \ - var_2 = var_1 >> 1; \ - var_3 = var_2 >> 1; \ - var_4 = var_3 >> 1; \ - var_5 = var_4 >> 1; \ - var_6 = var_5 >> 1; \ - var_7 = var_6 >> 1; \ - var_0 &= 1; \ - var_1 &= 1; \ - var_2 &= 1; \ - var_3 &= 1; \ - var_4 &= 1; \ - var_5 &= 1; \ - var_6 &= 1; \ - var_7 &= 1; \ - t1_0 = a[6] << 1; \ - t1_1 = a[6]; \ - t1_2 = t1_1 >> 1; \ - t1_3 = t1_2 >> 1; \ - t1_4 = t1_3 >> 1; \ - t1_5 = t1_4 >> 1; \ - t1_6 = t1_5 >> 1; \ - t1_7 = t1_6 >> 1; \ - var_0 |= t1_0 & 2; \ - var_1 |= t1_1 & 2; \ - var_2 |= t1_2 & 2; \ - var_3 |= t1_3 & 2; \ - var_4 |= t1_4 & 2; \ - var_5 |= t1_5 & 2; \ - var_6 |= t1_6 & 2; \ - var_7 |= t1_7 & 2; \ - t1_1 = a[5] << 1; \ - t1_0 = t1_1 << 1; \ - t1_2 = a[5]; \ - t1_3 = t1_2 >> 1; \ - t1_4 = t1_3 >> 1; \ - t1_5 = t1_4 >> 1; \ - t1_6 = t1_5 >> 1; \ - t1_7 = t1_6 >> 1; \ - var_0 |= t1_0 & 4; \ - var_1 |= t1_1 & 4; \ - var_2 |= t1_2 & 4; \ - var_3 |= t1_3 & 4; \ - var_4 |= t1_4 & 4; \ - var_5 |= t1_5 & 4; \ - var_6 |= t1_6 & 4; \ - var_7 |= t1_7 & 4; \ - t1_2 = a[4] << 1; \ - t1_1 = t1_2 << 1; \ - t1_0 = t1_1 << 1; \ - t1_3 = a[4]; \ - t1_4 = t1_3 >> 1; \ - t1_5 = t1_4 >> 1; \ - t1_6 = t1_5 >> 1; \ - t1_7 = t1_6 >> 1; \ - var_0 |= t1_0 & 8; \ - var_1 |= t1_1 & 8; \ - var_2 |= t1_2 & 8; \ - var_3 |= t1_3 & 8; \ - var_4 |= t1_4 & 8; \ - var_5 |= t1_5 & 8; \ - var_6 |= t1_6 & 8; \ - var_7 |= t1_7 & 8; \ - t1_3 = a[3] << 1; \ - t1_2 = t1_3 << 1; \ - t1_1 = t1_2 << 1; \ - t1_0 = t1_1 << 1; \ - t1_4 = a[3]; \ - t1_5 = t1_4 >> 1; \ - t1_6 = t1_5 >> 1; \ - t1_7 = t1_6 >> 1; \ - var_0 |= t1_0 & 16; \ - var_1 |= t1_1 & 16; \ - var_2 |= t1_2 & 16; \ - var_3 |= t1_3 & 16; \ - var_4 |= t1_4 & 16; \ - var_5 |= t1_5 & 16; \ - var_6 |= t1_6 & 16; \ - var_7 |= t1_7 & 16; \ - t1_4 = a[2] << 1; \ - t1_3 = t1_4 << 1; \ - t1_2 = t1_3 << 1; \ - t1_1 = t1_2 << 1; \ - t1_0 = t1_1 << 1; \ - t1_5 = a[2]; \ - t1_6 = t1_5 >> 1; \ - t1_7 = t1_6 >> 1; \ - var_0 |= t1_0 & 32; \ - var_1 |= t1_1 & 32; \ - var_2 |= t1_2 & 32; \ - var_3 |= t1_3 & 32; \ - var_4 |= t1_4 & 32; \ - var_5 |= t1_5 & 32; \ - var_6 |= t1_6 & 32; \ - var_7 |= t1_7 & 32; \ - t1_5 = a[1] << 1; \ - t1_4 = t1_5 << 1; \ - t1_3 = t1_4 << 1; \ - t1_2 = t1_3 << 1; \ - t1_1 = t1_2 << 1; \ - t1_0 = t1_1 << 1; \ - t1_6 = a[1]; \ - t1_7 = t1_6 >> 1; \ - var_0 |= t1_0 & 64; \ - var_1 |= t1_1 & 64; \ - var_2 |= t1_2 & 64; \ - var_3 |= t1_3 & 64; \ - var_4 |= t1_4 & 64; \ - var_5 |= t1_5 & 64; \ - var_6 |= t1_6 & 64; \ - var_7 |= t1_7 & 64; \ - t1_6 = a[0] << 1; \ - t1_5 = t1_6 << 1; \ - t1_4 = t1_5 << 1; \ - t1_3 = t1_4 << 1; \ - t1_2 = t1_3 << 1; \ - t1_1 = t1_2 << 1; \ - t1_0 = t1_1 << 1; \ - t1_7 = a[0]; \ - var_0 |= t1_0 & 128; \ - var_1 |= t1_1 & 128; \ - var_2 |= t1_2 & 128; \ - var_3 |= t1_3 & 128; \ - var_4 |= t1_4 & 128; \ - var_5 |= t1_5 & 128; \ - var_6 |= t1_6 & 128; \ - var_7 |= t1_7 & 128; \ - } while (0) - -#define EXPAND_BYTE_ARRAY_8(a, var_7, var_6, var_5, var_4, var_3, var_2, \ - var_1, var_0) \ - do { \ - a[7] = var_0 & 1; \ - t1_0 = var_0 >> 1; \ - a[6] = t1_0 & 1; \ - t1_0 >>= 1; \ - a[5] = t1_0 & 1; \ - t1_0 >>= 1; \ - a[4] = t1_0 & 1; \ - t1_0 >>= 1; \ - a[3] = t1_0 & 1; \ - t1_0 >>= 1; \ - a[2] = t1_0 & 1; \ - t1_0 >>= 1; \ - a[1] = t1_0 & 1; \ - t1_0 >>= 1; \ - a[0] = t1_0 & 1; \ - a[6] |= var_1 & 2; \ - t1_1 = var_1 << 1; \ - a[7] |= t1_1 & 2; \ - t1_1 = var_1 >> 1; \ - a[5] |= t1_1 & 2; \ - t1_1 >>= 1; \ - a[4] |= t1_1 & 2; \ - t1_1 >>= 1; \ - a[3] |= t1_1 & 2; \ - t1_1 >>= 1; \ - a[2] |= t1_1 & 2; \ - t1_1 >>= 1; \ - a[1] |= t1_1 & 2; \ - t1_1 >>= 1; \ - a[0] |= t1_1 & 2; \ - a[5] |= var_2 & 4; \ - t1_2 = var_2 << 1; \ - a[6] |= t1_2 & 4; \ - t1_2 <<= 1; \ - a[7] |= t1_2 & 4; \ - t1_2 = var_2 >> 1; \ - a[4] |= t1_2 & 4; \ - t1_2 >>= 1; \ - a[3] |= t1_2 & 4; \ - t1_2 >>= 1; \ - a[2] |= t1_2 & 4; \ - t1_2 >>= 1; \ - a[1] |= t1_2 & 4; \ - t1_2 >>= 1; \ - a[0] |= t1_2 & 4; \ - a[4] |= var_3 & 8; \ - t1_3 = var_3 << 1; \ - a[5] |= t1_3 & 8; \ - t1_3 <<= 1; \ - a[6] |= t1_3 & 8; \ - t1_3 <<= 1; \ - a[7] |= t1_3 & 8; \ - t1_3 = var_3 >> 1; \ - a[3] |= t1_3 & 8; \ - t1_3 >>= 1; \ - a[2] |= t1_3 & 8; \ - t1_3 >>= 1; \ - a[1] |= t1_3 & 8; \ - t1_3 >>= 1; \ - a[0] |= t1_3 & 8; \ - a[3] |= var_4 & 16; \ - t1_4 = var_4 << 1; \ - a[4] |= t1_4 & 16; \ - t1_4 <<= 1; \ - a[5] |= t1_4 & 16; \ - t1_4 <<= 1; \ - a[6] |= t1_4 & 16; \ - t1_4 <<= 1; \ - a[7] |= t1_4 & 16; \ - t1_4 = var_4 >> 1; \ - a[2] |= t1_4 & 16; \ - t1_4 >>= 1; \ - a[1] |= t1_4 & 16; \ - t1_4 >>= 1; \ - a[0] |= t1_4 & 16; \ - a[2] |= var_5 & 32; \ - t1_5 = var_5 << 1; \ - a[3] |= t1_5 & 32; \ - t1_5 <<= 1; \ - a[4] |= t1_5 & 32; \ - t1_5 <<= 1; \ - a[5] |= t1_5 & 32; \ - t1_5 <<= 1; \ - a[6] |= t1_5 & 32; \ - t1_5 <<= 1; \ - a[7] |= t1_5 & 32; \ - t1_5 = var_5 >> 1; \ - a[1] |= t1_5 & 32; \ - t1_5 >>= 1; \ - a[0] |= t1_5 & 32; \ - a[1] |= var_6 & 64; \ - t1_6 = var_6 << 1; \ - a[2] |= t1_6 & 64; \ - t1_6 <<= 1; \ - a[3] |= t1_6 & 64; \ - t1_6 <<= 1; \ - a[4] |= t1_6 & 64; \ - t1_6 <<= 1; \ - a[5] |= t1_6 & 64; \ - t1_6 <<= 1; \ - a[6] |= t1_6 & 64; \ - t1_6 <<= 1; \ - a[7] |= t1_6 & 64; \ - t1_6 = var_6 >> 1; \ - a[0] |= t1_6 & 64; \ - a[0] |= var_7 & 128; \ - t1_7 = var_7 << 1; \ - a[1] |= t1_7 & 128; \ - t1_7 <<= 1; \ - a[2] |= t1_7 & 128; \ - t1_7 <<= 1; \ - a[3] |= t1_7 & 128; \ - t1_7 <<= 1; \ - a[4] |= t1_7 & 128; \ - t1_7 <<= 1; \ - a[5] |= t1_7 & 128; \ - t1_7 <<= 1; \ - a[6] |= t1_7 & 128; \ - t1_7 <<= 1; \ - a[7] |= t1_7 & 128; \ - } while (0) - -/* This way of implementing Ascon's S-box was inpired by personal communication - */ -/* with Joan Daemen about implementing the 3-bit chi layer. */ -#define ROUND_16(C_7, C_6, C_5, C_4, C_3, C_2, C_1, C_0) \ - do { \ - /* round constant */ \ - x2_0 ^= C_0; \ - x2_1 ^= C_1; \ - x2_2 ^= C_2; \ - x2_3 ^= C_3; \ - x2_4 ^= C_4; \ - x2_5 ^= C_5; \ - x2_6 ^= C_6; \ - x2_7 ^= C_7; \ - /* s-box layer */ \ - x0_0 ^= x4_0; \ - x4_0 ^= x3_0; \ - x2_0 ^= x1_0; \ - t0_0 = x0_0 & (~x4_0); \ - t1_0 = x2_0 & (~x1_0); \ - x0_0 ^= t1_0; \ - t1_0 = x4_0 & (~x3_0); \ - x2_0 ^= t1_0; \ - t1_0 = x1_0 & (~x0_0); \ - x4_0 ^= t1_0; \ - t1_0 = x3_0 & (~x2_0); \ - x1_0 ^= t1_0; \ - x3_0 ^= t0_0; \ - x1_0 ^= x0_0; \ - x3_0 ^= x2_0; \ - x0_0 ^= x4_0; \ - x2_0 = ~x2_0; \ - x0_1 ^= x4_1; \ - x4_1 ^= x3_1; \ - x2_1 ^= x1_1; \ - t0_0 = x0_1 & (~x4_1); \ - t1_0 = x2_1 & (~x1_1); \ - x0_1 ^= t1_0; \ - t1_0 = x4_1 & (~x3_1); \ - x2_1 ^= t1_0; \ - t1_0 = x1_1 & (~x0_1); \ - x4_1 ^= t1_0; \ - t1_0 = x3_1 & (~x2_1); \ - x1_1 ^= t1_0; \ - x3_1 ^= t0_0; \ - x1_1 ^= x0_1; \ - x3_1 ^= x2_1; \ - x0_1 ^= x4_1; \ - x2_1 = ~x2_1; \ - x0_2 ^= x4_2; \ - x4_2 ^= x3_2; \ - x2_2 ^= x1_2; \ - t0_0 = x0_2 & (~x4_2); \ - t1_0 = x2_2 & (~x1_2); \ - x0_2 ^= t1_0; \ - t1_0 = x4_2 & (~x3_2); \ - x2_2 ^= t1_0; \ - t1_0 = x1_2 & (~x0_2); \ - x4_2 ^= t1_0; \ - t1_0 = x3_2 & (~x2_2); \ - x1_2 ^= t1_0; \ - x3_2 ^= t0_0; \ - x1_2 ^= x0_2; \ - x3_2 ^= x2_2; \ - x0_2 ^= x4_2; \ - x2_2 = ~x2_2; \ - x0_3 ^= x4_3; \ - x4_3 ^= x3_3; \ - x2_3 ^= x1_3; \ - t0_0 = x0_3 & (~x4_3); \ - t1_0 = x2_3 & (~x1_3); \ - x0_3 ^= t1_0; \ - t1_0 = x4_3 & (~x3_3); \ - x2_3 ^= t1_0; \ - t1_0 = x1_3 & (~x0_3); \ - x4_3 ^= t1_0; \ - t1_0 = x3_3 & (~x2_3); \ - x1_3 ^= t1_0; \ - x3_3 ^= t0_0; \ - x1_3 ^= x0_3; \ - x3_3 ^= x2_3; \ - x0_3 ^= x4_3; \ - x2_3 = ~x2_3; \ - x0_4 ^= x4_4; \ - x4_4 ^= x3_4; \ - x2_4 ^= x1_4; \ - t0_0 = x0_4 & (~x4_4); \ - t1_0 = x2_4 & (~x1_4); \ - x0_4 ^= t1_0; \ - t1_0 = x4_4 & (~x3_4); \ - x2_4 ^= t1_0; \ - t1_0 = x1_4 & (~x0_4); \ - x4_4 ^= t1_0; \ - t1_0 = x3_4 & (~x2_4); \ - x1_4 ^= t1_0; \ - x3_4 ^= t0_0; \ - x1_4 ^= x0_4; \ - x3_4 ^= x2_4; \ - x0_4 ^= x4_4; \ - x2_4 = ~x2_4; \ - x0_5 ^= x4_5; \ - x4_5 ^= x3_5; \ - x2_5 ^= x1_5; \ - t0_0 = x0_5 & (~x4_5); \ - t1_0 = x2_5 & (~x1_5); \ - x0_5 ^= t1_0; \ - t1_0 = x4_5 & (~x3_5); \ - x2_5 ^= t1_0; \ - t1_0 = x1_5 & (~x0_5); \ - x4_5 ^= t1_0; \ - t1_0 = x3_5 & (~x2_5); \ - x1_5 ^= t1_0; \ - x3_5 ^= t0_0; \ - x1_5 ^= x0_5; \ - x3_5 ^= x2_5; \ - x0_5 ^= x4_5; \ - x2_5 = ~x2_5; \ - x0_6 ^= x4_6; \ - x4_6 ^= x3_6; \ - x2_6 ^= x1_6; \ - t0_0 = x0_6 & (~x4_6); \ - t1_0 = x2_6 & (~x1_6); \ - x0_6 ^= t1_0; \ - t1_0 = x4_6 & (~x3_6); \ - x2_6 ^= t1_0; \ - t1_0 = x1_6 & (~x0_6); \ - x4_6 ^= t1_0; \ - t1_0 = x3_6 & (~x2_6); \ - x1_6 ^= t1_0; \ - x3_6 ^= t0_0; \ - x1_6 ^= x0_6; \ - x3_6 ^= x2_6; \ - x0_6 ^= x4_6; \ - x2_6 = ~x2_6; \ - x0_7 ^= x4_7; \ - x4_7 ^= x3_7; \ - x2_7 ^= x1_7; \ - t0_0 = x0_7 & (~x4_7); \ - t1_0 = x2_7 & (~x1_7); \ - x0_7 ^= t1_0; \ - t1_0 = x4_7 & (~x3_7); \ - x2_7 ^= t1_0; \ - t1_0 = x1_7 & (~x0_7); \ - x4_7 ^= t1_0; \ - t1_0 = x3_7 & (~x2_7); \ - x1_7 ^= t1_0; \ - x3_7 ^= t0_0; \ - x1_7 ^= x0_7; \ - x3_7 ^= x2_7; \ - x0_7 ^= x4_7; \ - x2_7 = ~x2_7; \ - /* linear layer */ \ - t0_0 = x0_0; \ - t0_1 = x0_1; \ - t0_2 = x0_2; \ - t0_3 = x0_3; \ - t0_4 = x0_4; \ - t0_5 = x0_5; \ - t0_6 = x0_6; \ - t0_7 = x0_7; \ - x0_5 ^= ROR8(t0_0, 3); \ - x0_6 ^= ROR8(t0_1, 3); \ - x0_7 ^= ROR8(t0_2, 3); \ - x0_0 ^= ROR8(t0_3, 2); \ - x0_1 ^= ROR8(t0_4, 2); \ - x0_2 ^= ROR8(t0_5, 2); \ - x0_3 ^= ROR8(t0_6, 2); \ - x0_4 ^= ROR8(t0_7, 2); \ - x0_4 ^= ROR8(t0_0, 4); \ - x0_5 ^= ROR8(t0_1, 4); \ - x0_6 ^= ROR8(t0_2, 4); \ - x0_7 ^= ROR8(t0_3, 4); \ - x0_0 ^= ROR8(t0_4, 3); \ - x0_1 ^= ROR8(t0_5, 3); \ - x0_2 ^= ROR8(t0_6, 3); \ - x0_3 ^= ROR8(t0_7, 3); \ - t0_0 = x1_0; \ - t0_1 = x1_1; \ - t0_2 = x1_2; \ - t0_3 = x1_3; \ - t0_4 = x1_4; \ - t0_5 = x1_5; \ - t0_6 = x1_6; \ - t0_7 = x1_7; \ - x1_3 ^= t0_0; \ - x1_4 ^= t0_1; \ - x1_5 ^= t0_2; \ - x1_6 ^= t0_3; \ - x1_7 ^= t0_4; \ - x1_0 ^= ROR8(t0_5, 7); \ - x1_1 ^= ROR8(t0_6, 7); \ - x1_2 ^= ROR8(t0_7, 7); \ - x1_1 ^= ROR8(t0_0, 5); \ - x1_2 ^= ROR8(t0_1, 5); \ - x1_3 ^= ROR8(t0_2, 5); \ - x1_4 ^= ROR8(t0_3, 5); \ - x1_5 ^= ROR8(t0_4, 5); \ - x1_6 ^= ROR8(t0_5, 5); \ - x1_7 ^= ROR8(t0_6, 5); \ - x1_0 ^= ROR8(t0_7, 4); \ - t0_0 = x2_0; \ - t0_1 = x2_1; \ - t0_2 = x2_2; \ - t0_3 = x2_3; \ - t0_4 = x2_4; \ - t0_5 = x2_5; \ - t0_6 = x2_6; \ - t0_7 = x2_7; \ - x2_7 ^= ROR8(t0_0, 1); \ - x2_0 ^= t0_1; \ - x2_1 ^= t0_2; \ - x2_2 ^= t0_3; \ - x2_3 ^= t0_4; \ - x2_4 ^= t0_5; \ - x2_5 ^= t0_6; \ - x2_6 ^= t0_7; \ - x2_2 ^= ROR8(t0_0, 1); \ - x2_3 ^= ROR8(t0_1, 1); \ - x2_4 ^= ROR8(t0_2, 1); \ - x2_5 ^= ROR8(t0_3, 1); \ - x2_6 ^= ROR8(t0_4, 1); \ - x2_7 ^= ROR8(t0_5, 1); \ - x2_0 ^= t0_6; \ - x2_1 ^= t0_7; \ - t0_0 = x3_0; \ - t0_1 = x3_1; \ - t0_2 = x3_2; \ - t0_3 = x3_3; \ - t0_4 = x3_4; \ - t0_5 = x3_5; \ - t0_6 = x3_6; \ - t0_7 = x3_7; \ - x3_6 ^= ROR8(t0_0, 2); \ - x3_7 ^= ROR8(t0_1, 2); \ - x3_0 ^= ROR8(t0_2, 1); \ - x3_1 ^= ROR8(t0_3, 1); \ - x3_2 ^= ROR8(t0_4, 1); \ - x3_3 ^= ROR8(t0_5, 1); \ - x3_4 ^= ROR8(t0_6, 1); \ - x3_5 ^= ROR8(t0_7, 1); \ - x3_7 ^= ROR8(t0_0, 3); \ - x3_0 ^= ROR8(t0_1, 2); \ - x3_1 ^= ROR8(t0_2, 2); \ - x3_2 ^= ROR8(t0_3, 2); \ - x3_3 ^= ROR8(t0_4, 2); \ - x3_4 ^= ROR8(t0_5, 2); \ - x3_5 ^= ROR8(t0_6, 2); \ - x3_6 ^= ROR8(t0_7, 2); \ - t0_0 = x4_0; \ - t0_1 = x4_1; \ - t0_2 = x4_2; \ - t0_3 = x4_3; \ - t0_4 = x4_4; \ - t0_5 = x4_5; \ - t0_6 = x4_6; \ - t0_7 = x4_7; \ - x4_1 ^= ROR8(t0_0, 1); \ - x4_2 ^= ROR8(t0_1, 1); \ - x4_3 ^= ROR8(t0_2, 1); \ - x4_4 ^= ROR8(t0_3, 1); \ - x4_5 ^= ROR8(t0_4, 1); \ - x4_6 ^= ROR8(t0_5, 1); \ - x4_7 ^= ROR8(t0_6, 1); \ - x4_0 ^= t0_7; \ - x4_7 ^= ROR8(t0_0, 6); \ - x4_0 ^= ROR8(t0_1, 5); \ - x4_1 ^= ROR8(t0_2, 5); \ - x4_2 ^= ROR8(t0_3, 5); \ - x4_3 ^= ROR8(t0_4, 5); \ - x4_4 ^= ROR8(t0_5, 5); \ - x4_5 ^= ROR8(t0_6, 5); \ - x4_6 ^= ROR8(t0_7, 5); \ - } while (0) - -#define P12_8 \ - do { \ - ROUND_16(1, 1, 1, 1, 0, 0, 0, 0); \ - ROUND_16(1, 1, 1, 0, 0, 0, 0, 1); \ - ROUND_16(1, 1, 0, 1, 0, 0, 1, 0); \ - ROUND_16(1, 1, 0, 0, 0, 0, 1, 1); \ - ROUND_16(1, 0, 1, 1, 0, 1, 0, 0); \ - ROUND_16(1, 0, 1, 0, 0, 1, 0, 1); \ - ROUND_16(1, 0, 0, 1, 0, 1, 1, 0); \ - ROUND_16(1, 0, 0, 0, 0, 1, 1, 1); \ - ROUND_16(0, 1, 1, 1, 1, 0, 0, 0); \ - ROUND_16(0, 1, 1, 0, 1, 0, 0, 1); \ - ROUND_16(0, 1, 0, 1, 1, 0, 1, 0); \ - ROUND_16(0, 1, 0, 0, 1, 0, 1, 1); \ - } while (0) - -int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t inlen) { - uint64_t rlen; - uint64_t i; - - uint8_t buffer[8]; - - uint8_t x0_0, x1_0, x2_0, x3_0, x4_0; - uint8_t t0_0, t1_0; - - uint8_t x0_1, x1_1, x2_1, x3_1, x4_1; - uint8_t t0_1, t1_1; - - uint8_t x0_2, x1_2, x2_2, x3_2, x4_2; - uint8_t t0_2, t1_2; - - uint8_t x0_3, x1_3, x2_3, x3_3, x4_3; - uint8_t t0_3, t1_3; - - uint8_t x0_4, x1_4, x2_4, x3_4, x4_4; - uint8_t t0_4, t1_4; - - uint8_t x0_5, x1_5, x2_5, x3_5, x4_5; - uint8_t t0_5, t1_5; - - uint8_t x0_6, x1_6, x2_6, x3_6, x4_6; - uint8_t t0_6, t1_6; - - uint8_t x0_7, x1_7, x2_7, x3_7, x4_7; - uint8_t t0_7, t1_7; - - uint8_t in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; /* initialization */ - x0_0 = 0x4d; - x0_1 = 0xdc; - x0_2 = 0x85; - x0_3 = 0xb9; - x0_4 = 0x6b; - x0_5 = 0x97; - x0_6 = 0x8e; - x0_7 = 0xfa; - x1_0 = 0x94; - x1_1 = 0xcd; - x1_2 = 0xc; - x1_3 = 0xa4; - x1_4 = 0x72; - x1_5 = 0x50; - x1_6 = 0x8; - x1_7 = 0xc8; - x2_0 = 0x14; - x2_1 = 0x73; - x2_2 = 0x84; - x2_3 = 0x5a; - x2_4 = 0xbe; - x2_5 = 0x81; - x2_6 = 0x17; - x2_7 = 0xfe; - x3_0 = 0xb2; - x3_1 = 0x82; - x3_2 = 0x0; - x3_3 = 0x6d; - x3_4 = 0x6c; - x3_5 = 0x1f; - x3_6 = 0x87; - x3_7 = 0x2f; - x4_0 = 0x7e; - x4_1 = 0x40; - x4_2 = 0xec; - x4_3 = 0x50; - x4_4 = 0x88; - x4_5 = 0xa6; - x4_6 = 0x1b; - x4_7 = 0x7a; - - /* absorb */ - rlen = inlen; - while (rlen >= ASCON_RATE) { - COMPRESS_BYTE_ARRAY_8(in, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); - x0_0 ^= in_0; - x0_1 ^= in_1; - x0_2 ^= in_2; - x0_3 ^= in_3; - x0_4 ^= in_4; - x0_5 ^= in_5; - x0_6 ^= in_6; - x0_7 ^= in_7; - P12_8; - rlen -= ASCON_RATE; +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + s.x0 = XOR(s.x0, LOAD64(in)); + P12(&s); in += ASCON_RATE; + len -= ASCON_RATE; } - for (i = 0; i < rlen; ++i, ++in) buffer[i] = *in; - buffer[rlen] = 0x80; - for (i = rlen + 1; i < 8; ++i) buffer[i] = 0; - COMPRESS_BYTE_ARRAY_8(buffer, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); - x0_0 ^= in_0; - x0_1 ^= in_1; - x0_2 ^= in_2; - x0_3 ^= in_3; - x0_4 ^= in_4; - x0_5 ^= in_5; - x0_6 ^= in_6; - x0_7 ^= in_7; - P12_8; - - /* squeeze (full blocks) */ - rlen = CRYPTO_BYTES; - while (rlen > ASCON_RATE) { - EXPAND_BYTE_ARRAY_8(out, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); - P12_8; - rlen -= ASCON_RATE; + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); out += ASCON_RATE; + len -= ASCON_RATE; } - EXPAND_BYTE_ARRAY_8(out, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); + STORE64(out, s.x0); + printstate("squeeze output", &s); return 0; } diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/interleave.c b/ascon/Implementations/crypto_hash/asconhashv12/bi8/interleave.c new file mode 100644 index 0000000..659255b --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/interleave.c @@ -0,0 +1,12 @@ +#include "interleave.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t interleave8(uint64_t x) { + x = (x & 0xaa55aa55aa55aa55ull) | ((x & 0x00aa00aa00aa00aaull) << 7) | + ((x >> 7) & 0x00aa00aa00aa00aaull); + x = (x & 0xcccc3333cccc3333ull) | ((x & 0x0000cccc0000ccccull) << 14) | + ((x >> 14) & 0x0000cccc0000ccccull); + x = (x & 0xf0f0f0f00f0f0f0full) | ((x & 0x00000000f0f0f0f0ull) << 28) | + ((x >> 28) & 0x00000000f0f0f0f0ull); + return x; +} diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/interleave.h b/ascon/Implementations/crypto_hash/asconhashv12/bi8/interleave.h new file mode 100644 index 0000000..62937e0 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/interleave.h @@ -0,0 +1,8 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +uint64_t interleave8(uint64_t x); + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/permutations.c b/ascon/Implementations/crypto_hash/asconhashv12/bi8/permutations.c new file mode 100644 index 0000000..a0cc038 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/permutations.c @@ -0,0 +1,45 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint64_t constants[12] = { + 0x0101010100000000ull, 0x0101010000000001ull, 0x0101000100000100ull, + 0x0101000000000101ull, 0x0100010100010000ull, 0x0100010000010001ull, + 0x0100000100010100ull, 0x0100000000010101ull, 0x0001010101000000ull, + 0x0001010001000001ull, 0x0001000101000100ull, 0x0001000001000101ull}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; ++i) ROUND(s, constants[i]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/bi8/permutations.h new file mode 100644 index 0000000..ee1b625 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/permutations.h @@ -0,0 +1,165 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x8040000020301000ull) +#define ASCON_128A_IV WORD_T(0xc000000030200000ull) +#define ASCON_80PQ_IV WORD_T(0x8040800020301000ull) +#define ASCON_HASH_IV WORD_T(0x0040000020200002ull) +#define ASCON_XOF_IV WORD_T(0x0040000020200000ull) + +#define ASCON_HASH_IV0 WORD_T(0xfa8e976bb985dc4dull) +#define ASCON_HASH_IV1 WORD_T(0xc8085072a40ccd94ull) +#define ASCON_HASH_IV2 WORD_T(0xfe1781be5a847314ull) +#define ASCON_HASH_IV3 WORD_T(0x2f871f6c6d0082b2ull) +#define ASCON_HASH_IV4 WORD_T(0x7a1ba68850ec407eull) + +#define ASCON_XOF_IV0 WORD_T(0x8a46f0d354e771b8ull) +#define ASCON_XOF_IV1 WORD_T(0x04489f4084368cd0ull) +#define ASCON_XOF_IV2 WORD_T(0x6c94f2150dbcf66cull) +#define ASCON_XOF_IV3 WORD_T(0x48965294f143b44eull) +#define ASCON_XOF_IV4 WORD_T(0x0788515fe0e5fb8aull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0x0101010100000000ull); + ROUND(s, 0x0101010000000001ull); + ROUND(s, 0x0101000100000100ull); + ROUND(s, 0x0101000000000101ull); + ROUND(s, 0x0100010100010000ull); + ROUND(s, 0x0100010000010001ull); + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x0100010100010000ull); + ROUND(s, 0x0100010000010001ull); + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint64_t constants[12]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; ++i) ROUND(s, constants[i]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; ++i) ROUND(s, constants[i]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; ++i) ROUND(s, constants[i]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/bi8/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/round.h b/ascon/Implementations/crypto_hash/asconhashv12/bi8/round.h new file mode 100644 index 0000000..15c7795 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, WORD_T(C)); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/word.h b/ascon/Implementations/crypto_hash/asconhashv12/bi8/word.h new file mode 100644 index 0000000..f1b5cbb --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/word.h @@ -0,0 +1,129 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" +#include "interleave.h" + +typedef union { + uint64_t w; + uint8_t b[8]; +} word_t; + +__forceinline uint8_t ROR8(uint8_t a, int n) { return a >> n | a << (8 - n); } + +__forceinline word_t ROR64(word_t a, int n) { + word_t b; + b.b[0] = ROR8(a.b[(n + 0) & 0x7], (n + 0) >> 3); + b.b[1] = ROR8(a.b[(n + 1) & 0x7], (n + 1) >> 3); + b.b[2] = ROR8(a.b[(n + 2) & 0x7], (n + 2) >> 3); + b.b[3] = ROR8(a.b[(n + 3) & 0x7], (n + 3) >> 3); + b.b[4] = ROR8(a.b[(n + 4) & 0x7], (n + 4) >> 3); + b.b[5] = ROR8(a.b[(n + 5) & 0x7], (n + 5) >> 3); + b.b[6] = ROR8(a.b[(n + 6) & 0x7], (n + 6) >> 3); + b.b[7] = ROR8(a.b[(n + 7) & 0x7], (n + 7) >> 3); + return b; +} + +__forceinline word_t WORD_T(uint64_t x) { + word_t w; + w.w = x; + return w; +} + +__forceinline uint64_t UINT64_T(word_t w) { + uint64_t x; + x = w.w; + return x; +} + +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(interleave8(x)); } + +__forceinline uint64_t WORDTOU64(word_t w) { return interleave8(UINT64_T(w)); } + +__forceinline word_t NOT(word_t a) { + a.w = ~a.w; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.w ^= b.w; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.w &= b.w; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t w; + w.w = lo2hi.w << 32 | hi2lo.w >> 32; + return w; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a.w | b.w; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { return (word_t){.b[7] = 0x80 >> i}; } + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint8_t m = 0xff >> n; + word_t mask = { + .b[0] = m, + .b[1] = m, + .b[2] = m, + .b[3] = m, + .b[4] = m, + .b[5] = m, + .b[6] = m, + .b[7] = m, + }; + return AND(w, mask); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64/ascon.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/config.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/config.h index 8d8a1a0..7dfad92 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64/config.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/opt64/hash.c index 34dec63..bf77d61 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/hash.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -26,13 +25,13 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* absorb plaintext */ while (len >= ASCON_RATE) { - XOR(s.x0, LOAD64(in)); + s.x0 = XOR(s.x0, LOAD64(in)); P12(&s); in += ASCON_RATE; len -= ASCON_RATE; } - if (len) XOR(s.x0, LOAD(in, len)); - XOR(s.x0, PAD(len)); + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); P12(&s); printstate("absorb plaintext", &s); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/permutations.h index 6172dd5..66f3cf3 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64/permutations.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64/printstate.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/word.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/word.h index 5d601bb..9acbd12 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64/word.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/word.h @@ -3,45 +3,78 @@ #include -#include "config.h" +#include "endian.h" typedef uint64_t word_t; #define WORD_T #define UINT64_T - #define U64TOWORD #define WORDTOU64 -#define XOR(a, b) \ - do { \ - (a) ^= (b); \ - } while (0) +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } -#define AND(a, b) \ - do { \ - (a) &= (b); \ - } while (0) +__forceinline word_t NOT(word_t a) { return ~a; } -__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } +__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; } + +__forceinline word_t AND(word_t a, word_t b) { return a & b; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return lo2hi << 32 | hi2lo >> 32; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a | b; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { - return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/ascon.h index aa685d3..c89ec89 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/ascon.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/ascon.h @@ -10,9 +10,11 @@ typedef struct { word_t x0, x1, x2, x3, x4; } state_t; -#define ASCON_AD 0 -#define ASCON_ENC 1 -#define ASCON_DEC 2 +#define ASCON_ABSORB 0x1 +#define ASCON_SQUEEZE 0x2 +#define ASCON_INSERT 0x4 +#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE) +#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT) void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, uint8_t mode); @@ -21,4 +23,4 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* k, uint8_t mode); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/config.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/config.h index ec8bd6f..ca59e3b 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/config.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/hash.c index 34dec63..bf77d61 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/hash.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -26,13 +25,13 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* absorb plaintext */ while (len >= ASCON_RATE) { - XOR(s.x0, LOAD64(in)); + s.x0 = XOR(s.x0, LOAD64(in)); P12(&s); in += ASCON_RATE; len -= ASCON_RATE; } - if (len) XOR(s.x0, LOAD(in, len)); - XOR(s.x0, PAD(len)); + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); P12(&s); printstate("absorb plaintext", &s); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/permutations.h index 6172dd5..66f3cf3 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/permutations.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/printstate.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/round.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/round.h index 077cbfd..cc5cd9f 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/round.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/round.h @@ -19,50 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint64_t C) { - state_t t; - s->x2 ^= C; - s->x0 ^= s->x4; - s->x4 ^= s->x3; - s->x2 ^= s->x1; - t.x0 = s->x0; - t.x4 = s->x4; - t.x3 = s->x3; - t.x1 = s->x1; - t.x2 = s->x2; - s->x0 = t.x0 ^ (~t.x1 & t.x2); - s->x2 = t.x2 ^ (~t.x3 & t.x4); - s->x4 = t.x4 ^ (~t.x0 & t.x1); - s->x1 = t.x1 ^ (~t.x2 & t.x3); - s->x3 = t.x3 ^ (~t.x4 & t.x0); - s->x1 ^= s->x0; - t.x1 = s->x1; - s->x1 = ROR64(s->x1, 39); - s->x3 ^= s->x2; - t.x2 = s->x2; - s->x2 = ROR64(s->x2, 1); - t.x4 = s->x4; - t.x2 ^= s->x2; - s->x2 = ROR64(s->x2, 6 - 1); - t.x3 = s->x3; - t.x1 ^= s->x1; - s->x3 = ROR64(s->x3, 10); - s->x0 ^= s->x4; - s->x4 = ROR64(s->x4, 7); - t.x3 ^= s->x3; - s->x2 ^= t.x2; - s->x1 = ROR64(s->x1, 61 - 39); - t.x0 = s->x0; - s->x2 = ~s->x2; - s->x3 = ROR64(s->x3, 17 - 10); - t.x4 ^= s->x4; - s->x4 = ROR64(s->x4, 41 - 7); - s->x3 ^= t.x3; - s->x1 ^= t.x1; - s->x0 = ROR64(s->x0, 19); - s->x4 ^= t.x4; - t.x0 ^= s->x0; - s->x0 = ROR64(s->x0, 28 - 19); - s->x0 ^= t.x0; + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, C); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/word.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/word.h index 5d601bb..9acbd12 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/word.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/word.h @@ -3,45 +3,78 @@ #include -#include "config.h" +#include "endian.h" typedef uint64_t word_t; #define WORD_T #define UINT64_T - #define U64TOWORD #define WORDTOU64 -#define XOR(a, b) \ - do { \ - (a) ^= (b); \ - } while (0) +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } -#define AND(a, b) \ - do { \ - (a) &= (b); \ - } while (0) +__forceinline word_t NOT(word_t a) { return ~a; } -__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } +__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; } + +__forceinline word_t AND(word_t a, word_t b) { return a & b; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return lo2hi << 32 | hi2lo >> 32; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a | b; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { - return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt8/api.h b/ascon/Implementations/crypto_hash/asconhashv12/opt8/api.h new file mode 100644 index 0000000..ad0325e --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt8/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_HASH diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt8/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/opt8/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt8/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt8/config.h b/ascon/Implementations/crypto_hash/asconhashv12/opt8/config.h new file mode 100644 index 0000000..ef5eb14 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt8/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt8/endian.h b/ascon/Implementations/crypto_hash/asconhashv12/opt8/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt8/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt8/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/opt8/hash.c new file mode 100644 index 0000000..bf77d61 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt8/hash.c @@ -0,0 +1,50 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + s.x0 = XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; + } + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; + } + STORE64(out, s.x0); + printstate("squeeze output", &s); + + return 0; +} diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt8/implementors b/ascon/Implementations/crypto_hash/asconhashv12/opt8/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt8/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt8/permutations.c b/ascon/Implementations/crypto_hash/asconhashv12/opt8/permutations.c new file mode 100644 index 0000000..8d39320 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt8/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt8/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/opt8/permutations.h new file mode 100644 index 0000000..66f3cf3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt8/permutations.h @@ -0,0 +1,163 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt8/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/opt8/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt8/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt8/round.h b/ascon/Implementations/crypto_hash/asconhashv12/opt8/round.h new file mode 100644 index 0000000..15c7795 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt8/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, WORD_T(C)); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt8/word.h b/ascon/Implementations/crypto_hash/asconhashv12/opt8/word.h new file mode 100644 index 0000000..873313d --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt8/word.h @@ -0,0 +1,115 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" + +typedef union { + uint64_t w; + uint8_t b[8]; +} word_t; + +#define U64TOWORD WORD_T +#define WORDTOU64 UINT64_T + +#define XMUL(i, x) \ + do { \ + tmp = (uint16_t)a.b[i] * (1 << (x)); \ + b.b[(byte_rol + (i)) & 0x7] ^= (uint8_t)tmp; \ + b.b[(byte_rol + (i) + 1) & 0x7] ^= tmp >> 8; \ + } while (0) + +__forceinline word_t ROR64(word_t a, int n) { + word_t b = {.w = 0ull}; + int bit_rol = (64 - n) & 0x7; + int byte_rol = (64 - n) >> 3; + uint16_t tmp; + XMUL(0, bit_rol); + XMUL(1, bit_rol); + XMUL(2, bit_rol); + XMUL(3, bit_rol); + XMUL(4, bit_rol); + XMUL(5, bit_rol); + XMUL(6, bit_rol); + XMUL(7, bit_rol); + return b; +} + +__forceinline word_t WORD_T(uint64_t x) { return (word_t){.w = x}; } + +__forceinline uint64_t UINT64_T(word_t w) { return w.w; } + +__forceinline word_t NOT(word_t a) { + a.w = ~a.w; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.w ^= b.w; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.w &= b.w; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return (word_t){.w = lo2hi.w << 32 | hi2lo.w >> 32}; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a.w | b.w; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/ref/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/ref/ascon.h index 8ab0502..c998868 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/ref/ascon.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/ref/ascon.h @@ -3,12 +3,8 @@ #include -#define WORDTOU64 - -typedef uint64_t word_t; - typedef struct { - word_t x0, x1, x2, x3, x4; + uint64_t x0, x1, x2, x3, x4; } state_t; -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/ref/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/ref/hash.c index 61dea5c..6277d7d 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/ref/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/ref/hash.c @@ -1,8 +1,8 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" +#include "word.h" int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { state_t s; @@ -18,12 +18,12 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* absorb plaintext */ while (len >= ASCON_128_RATE) { - s.x0 ^= LOAD(in, 8); + s.x0 ^= LOADBYTES(in, 8); P12(&s); in += ASCON_128_RATE; len -= ASCON_128_RATE; } - s.x0 ^= LOAD(in, len); + s.x0 ^= LOADBYTES(in, len); s.x0 ^= PAD(len); P12(&s); printstate("absorb plaintext", &s); @@ -31,12 +31,12 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* squeeze output */ len = CRYPTO_BYTES; while (len > ASCON_128_RATE) { - STORE(out, s.x0, 8); + STOREBYTES(out, s.x0, 8); P12(&s); out += ASCON_128_RATE; len -= ASCON_128_RATE; } - STORE(out, s.x0, 8); + STOREBYTES(out, s.x0, 8); printstate("squeeze output", &s); return 0; diff --git a/ascon/Implementations/crypto_hash/asconhashv12/ref/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/ref/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/ref/printstate.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/ref/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_hash/asconhashv12/ref/word.h b/ascon/Implementations/crypto_hash/asconhashv12/ref/word.h new file mode 100644 index 0000000..5a1519b --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/ref/word.h @@ -0,0 +1,35 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#define WORDTOU64 +#define U64TOWORD + +typedef uint64_t word_t; + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STOREBYTES(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +static inline uint64_t CLEARBYTES(uint64_t x, int n) { + for (int i = 0; i < n; ++i) x &= ~SETBYTE(0xff, i); + return x; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/bi32/ascon.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/config.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/config.h index b1b5080..1447e7d 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/bi32/config.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 0 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'M' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/bi32/hash.c index 34dec63..bf77d61 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/bi32/hash.c +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/hash.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -26,13 +25,13 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* absorb plaintext */ while (len >= ASCON_RATE) { - XOR(s.x0, LOAD64(in)); + s.x0 = XOR(s.x0, LOAD64(in)); P12(&s); in += ASCON_RATE; len -= ASCON_RATE; } - if (len) XOR(s.x0, LOAD(in, len)); - XOR(s.x0, PAD(len)); + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); P12(&s); printstate("absorb plaintext", &s); diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/interleave.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/permutations.h index ef338f1..49fd52a 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/bi32/permutations.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/bi32/printstate.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/word.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/word.h index 8ffcaaa..b27c6c9 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/bi32/word.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/word.h @@ -3,55 +3,51 @@ #include -#include "config.h" +#include "endian.h" +#include "interleave.h" typedef struct { uint32_t e; uint32_t o; } word_t; +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + __forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; } __forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } -__forceinline uint64_t TOBI32(uint64_t in); - -__forceinline uint64_t FROMBI32(uint64_t in); +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } -__forceinline word_t U64TOWORD(uint64_t x) { - uint64_t w = TOBI32(x); - return (word_t){.o = w >> 32, .e = w}; -} +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } -__forceinline uint64_t WORDTOU64(word_t w) { - return FROMBI32((uint64_t)w.o << 32 | w.e); +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; } -#define XOR(a, b) \ - do { \ - word_t tb = b; \ - (a).e ^= tb.e; \ - (a).o ^= tb.o; \ - } while (0) - -#define AND(a, b) \ - do { \ - word_t tb = b; \ - (a).e &= tb.e; \ - (a).o &= tb.o; \ - } while (0) - -__forceinline uint32_t ROR32(uint32_t x, int n) { - return x >> n | x << (32 - n); +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; } -__forceinline word_t ROR64(word_t x, int n) { - word_t r; - r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); - r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); - return r; +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { @@ -61,57 +57,58 @@ __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return r; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ uint32_t mask = 0x0fffffff >> (n * 4 - 4); - return WORD_T((uint64_t)mask << 32 | mask); -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t TOBI32(uint64_t in) { - uint32_t hi = in >> 32; - uint32_t lo = in; - uint32_t r0, r1; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r0 = (lo & 0x0000FFFF) | (hi << 16); - r1 = (lo >> 16) | (hi & 0xFFFF0000); - return (uint64_t)r1 << 32 | r0; -} - -/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ -__forceinline uint64_t FROMBI32(uint64_t in) { - uint32_t r0 = in; - uint32_t r1 = in >> 32; - uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); - uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - return (uint64_t)hi << 32 | lo; + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/api.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/api.h new file mode 100644 index 0000000..d72f706 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_XOF diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/config.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/config.h new file mode 100644 index 0000000..0f5a485 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/endian.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/hash.c new file mode 100644 index 0000000..bf77d61 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/hash.c @@ -0,0 +1,50 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + s.x0 = XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; + } + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; + } + STORE64(out, s.x0); + printstate("squeeze output", &s); + + return 0; +} diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/implementors b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/interleave.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/permutations.c b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/permutations.c new file mode 100644 index 0000000..56273c6 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/permutations.c @@ -0,0 +1,44 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/permutations.h new file mode 100644 index 0000000..49fd52a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/permutations.h @@ -0,0 +1,168 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/round.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/round.h new file mode 100644 index 0000000..06da1ca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/round.h @@ -0,0 +1,102 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + uint32_t tmp0, tmp1, tmp2, tmp3; + /* clang-format off */ + __asm__ __volatile__( \ + "eor %[x2_e], %[x2_e], %[C_e]\n\t" \ + "eor %[x2_o], %[x2_o], %[C_o]\n\t" \ + "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ + "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ + "eor %[x4_e], %[x4_e], %[x3_e]\n\t" \ + "eor %[x4_o], %[x4_o], %[x3_o]\n\t" \ + "eor %[x2_e], %[x2_e], %[x1_e]\n\t" \ + "eor %[x2_o], %[x2_o], %[x1_o]\n\t" \ + "bic %[tmp0], %[x0_e], %[x4_e]\n\t" \ + "bic %[tmp1], %[x4_e], %[x3_e]\n\t" \ + "bic %[tmp2], %[x2_e], %[x1_e]\n\t" \ + "bic %[tmp3], %[x1_e], %[x0_e]\n\t" \ + "eor %[x2_e], %[x2_e], %[tmp1]\n\t" \ + "eor %[x0_e], %[x0_e], %[tmp2]\n\t" \ + "eor %[x4_e], %[x4_e], %[tmp3]\n\t" \ + "bic %[tmp3], %[x3_e], %[x2_e]\n\t" \ + "eor %[x3_e], %[x3_e], %[tmp0]\n\t" \ + "bic %[tmp2], %[x0_o], %[x4_o]\n\t" \ + "bic %[tmp0], %[x2_o], %[x1_o]\n\t" \ + "bic %[tmp1], %[x4_o], %[x3_o]\n\t" \ + "eor %[x1_e], %[x1_e], %[tmp3]\n\t" \ + "eor %[x0_o], %[x0_o], %[tmp0]\n\t" \ + "eor %[x2_o], %[x2_o], %[tmp1]\n\t" \ + "bic %[tmp3], %[x1_o], %[x0_o]\n\t" \ + "bic %[tmp0], %[x3_o], %[x2_o]\n\t" \ + "eor %[x3_o], %[x3_o], %[tmp2]\n\t" \ + "eor %[x3_o], %[x3_o], %[x2_o]\n\t" \ + "eor %[x4_o], %[x4_o], %[tmp3]\n\t" \ + "eor %[x1_o], %[x1_o], %[tmp0]\n\t" \ + "eor %[x3_e], %[x3_e], %[x2_e]\n\t" \ + "eor %[x1_e], %[x1_e], %[x0_e]\n\t" \ + "eor %[x1_o], %[x1_o], %[x0_o]\n\t" \ + "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ + "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ + "mvn %[x2_e], %[x2_e]\n\t" \ + "mvn %[x2_o], %[x2_o]\n\t" \ + "eor %[tmp0], %[x0_e], %[x0_o], ror #4\n\t" \ + "eor %[tmp1], %[x0_o], %[x0_e], ror #5\n\t" \ + "eor %[tmp2], %[x1_e], %[x1_e], ror #11\n\t" \ + "eor %[tmp3], %[x1_o], %[x1_o], ror #11\n\t" \ + "eor %[x0_e], %[x0_e], %[tmp1], ror #9\n\t" \ + "eor %[x0_o], %[x0_o], %[tmp0], ror #10\n\t" \ + "eor %[x1_e], %[x1_e], %[tmp3], ror #19\n\t" \ + "eor %[x1_o], %[x1_o], %[tmp2], ror #20\n\t" \ + "eor %[tmp0], %[x2_e], %[x2_o], ror #2\n\t" \ + "eor %[tmp1], %[x2_o], %[x2_e], ror #3\n\t" \ + "eor %[tmp2], %[x3_e], %[x3_o], ror #3\n\t" \ + "eor %[tmp3], %[x3_o], %[x3_e], ror #4\n\t" \ + "eor %[x2_e], %[x2_e], %[tmp1]\n\t" \ + "eor %[x2_o], %[x2_o], %[tmp0], ror #1\n\t" \ + "eor %[x3_e], %[x3_e], %[tmp2], ror #5\n\t" \ + "eor %[x3_o], %[x3_o], %[tmp3], ror #5\n\t" \ + "eor %[tmp0], %[x4_e], %[x4_e], ror #17\n\t" \ + "eor %[tmp1], %[x4_o], %[x4_o], ror #17\n\t" \ + "eor %[x4_e], %[x4_e], %[tmp1], ror #3\n\t" \ + "eor %[x4_o], %[x4_o], %[tmp0], ror #4\n\t" \ + : [ x0_e ] "+r"(s->x0.e), \ + [ x1_e ] "+r"(s->x1.e), \ + [ x2_e ] "+r"(s->x2.e), \ + [ x3_e ] "+r"(s->x3.e), \ + [ x4_e ] "+r"(s->x4.e), \ + [ x0_o ] "+r"(s->x0.o), \ + [ x1_o ] "+r"(s->x1.o), \ + [ x2_o ] "+r"(s->x2.o), \ + [ x3_o ] "+r"(s->x3.o), \ + [ x4_o ] "+r"(s->x4.o), \ + [ tmp0 ] "=r"(tmp0), \ + [ tmp1 ] "=r"(tmp1), \ + [ tmp2 ] "=r"(tmp2), \ + [ tmp3 ] "=r"(tmp3) \ + : [ C_e ] "i"(C_e), \ + [ C_o ] "i"(C_o) \ + : ); + /* clang-format on */ + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/word.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/word.h new file mode 100644 index 0000000..b27c6c9 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_arm/word.h @@ -0,0 +1,114 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" +#include "interleave.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } + +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } + +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/api.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/api.h new file mode 100644 index 0000000..d72f706 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_XOF diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/config.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/config.h new file mode 100644 index 0000000..0f5a485 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/endian.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/hash.c new file mode 100644 index 0000000..bf77d61 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/hash.c @@ -0,0 +1,50 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + s.x0 = XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; + } + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; + } + STORE64(out, s.x0); + printstate("squeeze output", &s); + + return 0; +} diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/implementors b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/interleave.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/interleave.h new file mode 100644 index 0000000..6e2e5c3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/interleave.h @@ -0,0 +1,47 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +__forceinline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +__forceinline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/permutations.c b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/permutations.c new file mode 100644 index 0000000..56273c6 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/permutations.c @@ -0,0 +1,44 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/permutations.h new file mode 100644 index 0000000..49fd52a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/permutations.h @@ -0,0 +1,168 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/round.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/round.h new file mode 100644 index 0000000..fa23bf3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + word_t tmp, C = {.o = C_o, .e = C_e}; + /* round constant */ + s->x2 = XOR(s->x2, C); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/word.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/word.h new file mode 100644 index 0000000..b27c6c9 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowreg/word.h @@ -0,0 +1,114 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" +#include "interleave.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } + +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } + +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/api.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/api.h new file mode 100644 index 0000000..d72f706 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_XOF diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/ascon.h new file mode 100644 index 0000000..c89ec89 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/ascon.h @@ -0,0 +1,26 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#define ASCON_ABSORB 0x1 +#define ASCON_SQUEEZE 0x2 +#define ASCON_INSERT 0x4 +#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE) +#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT) + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/config.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/config.h new file mode 100644 index 0000000..ca59e3b --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/endian.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/hash.c new file mode 100644 index 0000000..bf77d61 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/hash.c @@ -0,0 +1,50 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + s.x0 = XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; + } + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; + } + STORE64(out, s.x0); + printstate("squeeze output", &s); + + return 0; +} diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/implementors b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/interleave.c b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/interleave.c new file mode 100644 index 0000000..321d0ce --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/interleave.c @@ -0,0 +1,42 @@ +#include "interleave.h" + +static inline uint32_t deinterleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + return x; +} + +static inline uint32_t interleave_uint32(uint32_t x) { + uint32_t t; + t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); + return x; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t deinterleave32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + lo = deinterleave_uint32(lo); + hi = deinterleave_uint32(hi); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t interleave32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + lo = interleave_uint32(lo); + hi = interleave_uint32(hi); + return (uint64_t)hi << 32 | lo; +} diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/interleave.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/interleave.h new file mode 100644 index 0000000..f6590fb --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/interleave.h @@ -0,0 +1,9 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +uint64_t deinterleave32(uint64_t in); +uint64_t interleave32(uint64_t in); + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/permutations.c b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/permutations.c new file mode 100644 index 0000000..56273c6 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/permutations.c @@ -0,0 +1,44 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/permutations.h new file mode 100644 index 0000000..49fd52a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/permutations.h @@ -0,0 +1,168 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x8021000008220000) +#define ASCON_128A_IV WORD_T(0x8822000000200000) +#define ASCON_80PQ_IV WORD_T(0xc021000008220000) +#define ASCON_HASH_IV WORD_T(0x0020000008020010) +#define ASCON_XOF_IV WORD_T(0x0020000008020000) + +#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7) +#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340) +#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8) +#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98) +#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398) + +#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6) +#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220) +#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04) +#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2) +#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/round.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/round.h new file mode 100644 index 0000000..fa23bf3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + word_t tmp, C = {.o = C_o, .e = C_e}; + /* round constant */ + s->x2 = XOR(s->x2, C); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/word.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/word.h new file mode 100644 index 0000000..b27c6c9 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32_lowsize/word.h @@ -0,0 +1,114 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" +#include "interleave.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return (n == 0) ? x : x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } + +__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } + +__forceinline word_t NOT(word_t a) { + a.e = ~a.e; + a.o = ~a.o; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.e ^= b.e; + a.o ^= b.o; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.e &= b.e; + a.o &= b.o; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint32_t result = a.e | a.o | b.e | b.o; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return AND(w, WORD_T((uint64_t)mask << 32 | mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/api.h b/ascon/Implementations/crypto_hash/asconxofv12/bi8/api.h new file mode 100644 index 0000000..d72f706 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_XOF diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/bi8/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/config.h b/ascon/Implementations/crypto_hash/asconxofv12/bi8/config.h new file mode 100644 index 0000000..ef5eb14 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/endian.h b/ascon/Implementations/crypto_hash/asconxofv12/bi8/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/bi8/hash.c new file mode 100644 index 0000000..bf77d61 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/hash.c @@ -0,0 +1,50 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + s.x0 = XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; + } + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; + } + STORE64(out, s.x0); + printstate("squeeze output", &s); + + return 0; +} diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/implementors b/ascon/Implementations/crypto_hash/asconxofv12/bi8/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/interleave.c b/ascon/Implementations/crypto_hash/asconxofv12/bi8/interleave.c new file mode 100644 index 0000000..659255b --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/interleave.c @@ -0,0 +1,12 @@ +#include "interleave.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t interleave8(uint64_t x) { + x = (x & 0xaa55aa55aa55aa55ull) | ((x & 0x00aa00aa00aa00aaull) << 7) | + ((x >> 7) & 0x00aa00aa00aa00aaull); + x = (x & 0xcccc3333cccc3333ull) | ((x & 0x0000cccc0000ccccull) << 14) | + ((x >> 14) & 0x0000cccc0000ccccull); + x = (x & 0xf0f0f0f00f0f0f0full) | ((x & 0x00000000f0f0f0f0ull) << 28) | + ((x >> 28) & 0x00000000f0f0f0f0ull); + return x; +} diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/interleave.h b/ascon/Implementations/crypto_hash/asconxofv12/bi8/interleave.h new file mode 100644 index 0000000..62937e0 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/interleave.h @@ -0,0 +1,8 @@ +#ifndef INTERLEAVE_H_ +#define INTERLEAVE_H_ + +#include + +uint64_t interleave8(uint64_t x); + +#endif /* INTERLEAVE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/permutations.c b/ascon/Implementations/crypto_hash/asconxofv12/bi8/permutations.c new file mode 100644 index 0000000..a0cc038 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/permutations.c @@ -0,0 +1,45 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint64_t constants[12] = { + 0x0101010100000000ull, 0x0101010000000001ull, 0x0101000100000100ull, + 0x0101000000000101ull, 0x0100010100010000ull, 0x0100010000010001ull, + 0x0100000100010100ull, 0x0100000000010101ull, 0x0001010101000000ull, + 0x0001010001000001ull, 0x0001000101000100ull, 0x0001000001000101ull}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; ++i) ROUND(s, constants[i]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/bi8/permutations.h new file mode 100644 index 0000000..ee1b625 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/permutations.h @@ -0,0 +1,165 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x8040000020301000ull) +#define ASCON_128A_IV WORD_T(0xc000000030200000ull) +#define ASCON_80PQ_IV WORD_T(0x8040800020301000ull) +#define ASCON_HASH_IV WORD_T(0x0040000020200002ull) +#define ASCON_XOF_IV WORD_T(0x0040000020200000ull) + +#define ASCON_HASH_IV0 WORD_T(0xfa8e976bb985dc4dull) +#define ASCON_HASH_IV1 WORD_T(0xc8085072a40ccd94ull) +#define ASCON_HASH_IV2 WORD_T(0xfe1781be5a847314ull) +#define ASCON_HASH_IV3 WORD_T(0x2f871f6c6d0082b2ull) +#define ASCON_HASH_IV4 WORD_T(0x7a1ba68850ec407eull) + +#define ASCON_XOF_IV0 WORD_T(0x8a46f0d354e771b8ull) +#define ASCON_XOF_IV1 WORD_T(0x04489f4084368cd0ull) +#define ASCON_XOF_IV2 WORD_T(0x6c94f2150dbcf66cull) +#define ASCON_XOF_IV3 WORD_T(0x48965294f143b44eull) +#define ASCON_XOF_IV4 WORD_T(0x0788515fe0e5fb8aull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0x0101010100000000ull); + ROUND(s, 0x0101010000000001ull); + ROUND(s, 0x0101000100000100ull); + ROUND(s, 0x0101000000000101ull); + ROUND(s, 0x0100010100010000ull); + ROUND(s, 0x0100010000010001ull); + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x0100010100010000ull); + ROUND(s, 0x0100010000010001ull); + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x0100000100010100ull); + ROUND(s, 0x0100000000010101ull); + ROUND(s, 0x0001010101000000ull); + ROUND(s, 0x0001010001000001ull); + ROUND(s, 0x0001000101000100ull); + ROUND(s, 0x0001000001000101ull); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint64_t constants[12]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; ++i) ROUND(s, constants[i]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; ++i) ROUND(s, constants[i]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; ++i) ROUND(s, constants[i]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/bi8/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/round.h b/ascon/Implementations/crypto_hash/asconxofv12/bi8/round.h new file mode 100644 index 0000000..15c7795 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, WORD_T(C)); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi8/word.h b/ascon/Implementations/crypto_hash/asconxofv12/bi8/word.h new file mode 100644 index 0000000..f1b5cbb --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi8/word.h @@ -0,0 +1,129 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" +#include "interleave.h" + +typedef union { + uint64_t w; + uint8_t b[8]; +} word_t; + +__forceinline uint8_t ROR8(uint8_t a, int n) { return a >> n | a << (8 - n); } + +__forceinline word_t ROR64(word_t a, int n) { + word_t b; + b.b[0] = ROR8(a.b[(n + 0) & 0x7], (n + 0) >> 3); + b.b[1] = ROR8(a.b[(n + 1) & 0x7], (n + 1) >> 3); + b.b[2] = ROR8(a.b[(n + 2) & 0x7], (n + 2) >> 3); + b.b[3] = ROR8(a.b[(n + 3) & 0x7], (n + 3) >> 3); + b.b[4] = ROR8(a.b[(n + 4) & 0x7], (n + 4) >> 3); + b.b[5] = ROR8(a.b[(n + 5) & 0x7], (n + 5) >> 3); + b.b[6] = ROR8(a.b[(n + 6) & 0x7], (n + 6) >> 3); + b.b[7] = ROR8(a.b[(n + 7) & 0x7], (n + 7) >> 3); + return b; +} + +__forceinline word_t WORD_T(uint64_t x) { + word_t w; + w.w = x; + return w; +} + +__forceinline uint64_t UINT64_T(word_t w) { + uint64_t x; + x = w.w; + return x; +} + +__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(interleave8(x)); } + +__forceinline uint64_t WORDTOU64(word_t w) { return interleave8(UINT64_T(w)); } + +__forceinline word_t NOT(word_t a) { + a.w = ~a.w; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.w ^= b.w; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.w &= b.w; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t w; + w.w = lo2hi.w << 32 | hi2lo.w >> 32; + return w; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a.w | b.w; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { return (word_t){.b[7] = 0x80 >> i}; } + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint8_t m = 0xff >> n; + word_t mask = { + .b[0] = m, + .b[1] = m, + .b[2] = m, + .b[3] = m, + .b[4] = m, + .b[5] = m, + .b[6] = m, + .b[7] = m, + }; + return AND(w, mask); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/ascon.h index 10a5b6e..e596d64 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64/ascon.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/ascon.h @@ -16,4 +16,4 @@ void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_final(state_t* s, const uint8_t* k); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/config.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/config.h index 8d8a1a0..7dfad92 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64/config.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/opt64/hash.c index 34dec63..bf77d61 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64/hash.c +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/hash.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -26,13 +25,13 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* absorb plaintext */ while (len >= ASCON_RATE) { - XOR(s.x0, LOAD64(in)); + s.x0 = XOR(s.x0, LOAD64(in)); P12(&s); in += ASCON_RATE; len -= ASCON_RATE; } - if (len) XOR(s.x0, LOAD(in, len)); - XOR(s.x0, PAD(len)); + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); P12(&s); printstate("absorb plaintext", &s); diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/permutations.h index 6172dd5..66f3cf3 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64/permutations.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64/printstate.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/word.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/word.h index 5d601bb..9acbd12 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64/word.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/word.h @@ -3,45 +3,78 @@ #include -#include "config.h" +#include "endian.h" typedef uint64_t word_t; #define WORD_T #define UINT64_T - #define U64TOWORD #define WORDTOU64 -#define XOR(a, b) \ - do { \ - (a) ^= (b); \ - } while (0) +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } -#define AND(a, b) \ - do { \ - (a) &= (b); \ - } while (0) +__forceinline word_t NOT(word_t a) { return ~a; } -__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } +__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; } + +__forceinline word_t AND(word_t a, word_t b) { return a & b; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return lo2hi << 32 | hi2lo >> 32; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a | b; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { - return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/ascon.h index aa685d3..c89ec89 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/ascon.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/ascon.h @@ -10,9 +10,11 @@ typedef struct { word_t x0, x1, x2, x3, x4; } state_t; -#define ASCON_AD 0 -#define ASCON_ENC 1 -#define ASCON_DEC 2 +#define ASCON_ABSORB 0x1 +#define ASCON_SQUEEZE 0x2 +#define ASCON_INSERT 0x4 +#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE) +#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT) void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, uint8_t mode); @@ -21,4 +23,4 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* k, uint8_t mode); -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/config.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/config.h index ec8bd6f..ca59e3b 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/config.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/config.h @@ -21,11 +21,6 @@ #define ASCON_UNROLL_LOOPS 1 #endif -/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ -#ifndef ASCON_DATA_ACCESS -#define ASCON_DATA_ACCESS 'H' -#endif - /* make sure __forceinline is supported */ #ifndef __forceinline #define __forceinline inline __attribute__((always_inline)) diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/hash.c index 34dec63..bf77d61 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/hash.c +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/hash.c @@ -1,6 +1,5 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" @@ -26,13 +25,13 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* absorb plaintext */ while (len >= ASCON_RATE) { - XOR(s.x0, LOAD64(in)); + s.x0 = XOR(s.x0, LOAD64(in)); P12(&s); in += ASCON_RATE; len -= ASCON_RATE; } - if (len) XOR(s.x0, LOAD(in, len)); - XOR(s.x0, PAD(len)); + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); P12(&s); printstate("absorb plaintext", &s); diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/permutations.h index 6172dd5..66f3cf3 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/permutations.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/permutations.h @@ -21,44 +21,23 @@ #define ASCON_HASH_BYTES 32 -#define ASCON_128_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_128A_IV \ - U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) - -#define ASCON_80PQ_IV \ - U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ - ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) - -#define ASCON_HASH_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ - ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) - -#define ASCON_XOF_IV \ - U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ - ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) - -#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) -#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) -#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) -#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) -#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) - -#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) -#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) -#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) -#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) -#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define IV ASCON_128_IV diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/printstate.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/round.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/round.h index 077cbfd..cc5cd9f 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/round.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/round.h @@ -19,50 +19,34 @@ __forceinline void PINIT(state_t* s) { } __forceinline void ROUND(state_t* s, uint64_t C) { - state_t t; - s->x2 ^= C; - s->x0 ^= s->x4; - s->x4 ^= s->x3; - s->x2 ^= s->x1; - t.x0 = s->x0; - t.x4 = s->x4; - t.x3 = s->x3; - t.x1 = s->x1; - t.x2 = s->x2; - s->x0 = t.x0 ^ (~t.x1 & t.x2); - s->x2 = t.x2 ^ (~t.x3 & t.x4); - s->x4 = t.x4 ^ (~t.x0 & t.x1); - s->x1 = t.x1 ^ (~t.x2 & t.x3); - s->x3 = t.x3 ^ (~t.x4 & t.x0); - s->x1 ^= s->x0; - t.x1 = s->x1; - s->x1 = ROR64(s->x1, 39); - s->x3 ^= s->x2; - t.x2 = s->x2; - s->x2 = ROR64(s->x2, 1); - t.x4 = s->x4; - t.x2 ^= s->x2; - s->x2 = ROR64(s->x2, 6 - 1); - t.x3 = s->x3; - t.x1 ^= s->x1; - s->x3 = ROR64(s->x3, 10); - s->x0 ^= s->x4; - s->x4 = ROR64(s->x4, 7); - t.x3 ^= s->x3; - s->x2 ^= t.x2; - s->x1 = ROR64(s->x1, 61 - 39); - t.x0 = s->x0; - s->x2 = ~s->x2; - s->x3 = ROR64(s->x3, 17 - 10); - t.x4 ^= s->x4; - s->x4 = ROR64(s->x4, 41 - 7); - s->x3 ^= t.x3; - s->x1 ^= t.x1; - s->x0 = ROR64(s->x0, 19); - s->x4 ^= t.x4; - t.x0 ^= s->x0; - s->x0 = ROR64(s->x0, 28 - 19); - s->x0 ^= t.x0; + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, C); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); printstate(" round output", s); } diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/word.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/word.h index 5d601bb..9acbd12 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/word.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/word.h @@ -3,45 +3,78 @@ #include -#include "config.h" +#include "endian.h" typedef uint64_t word_t; #define WORD_T #define UINT64_T - #define U64TOWORD #define WORDTOU64 -#define XOR(a, b) \ - do { \ - (a) ^= (b); \ - } while (0) +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } -#define AND(a, b) \ - do { \ - (a) &= (b); \ - } while (0) +__forceinline word_t NOT(word_t a) { return ~a; } -__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } +__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; } + +__forceinline word_t AND(word_t a, word_t b) { return a & b; } __forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { return lo2hi << 32 | hi2lo >> 32; } -__forceinline int NOTZERO(word_t a, word_t b) { - int result = 0; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; - for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; - return result; +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a | b; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; } -/* set padding byte in 64-bit Ascon word */ __forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } -/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ -__forceinline word_t XMASK(int n) { - return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; } #endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt8/api.h b/ascon/Implementations/crypto_hash/asconxofv12/opt8/api.h new file mode 100644 index 0000000..d72f706 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt8/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_XOF diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt8/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/opt8/ascon.h new file mode 100644 index 0000000..e596d64 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt8/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt8/config.h b/ascon/Implementations/crypto_hash/asconxofv12/opt8/config.h new file mode 100644 index 0000000..ef5eb14 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt8/config.h @@ -0,0 +1,29 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt8/endian.h b/ascon/Implementations/crypto_hash/asconxofv12/opt8/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt8/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt8/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/opt8/hash.c new file mode 100644 index 0000000..bf77d61 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt8/hash.c @@ -0,0 +1,50 @@ +#include "api.h" +#include "ascon.h" +#include "permutations.h" +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + s.x0 = XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; + } + if (len) s.x0 = XOR(s.x0, LOAD(in, len)); + s.x0 = XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; + } + STORE64(out, s.x0); + printstate("squeeze output", &s); + + return 0; +} diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt8/implementors b/ascon/Implementations/crypto_hash/asconxofv12/opt8/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt8/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt8/permutations.c b/ascon/Implementations/crypto_hash/asconxofv12/opt8/permutations.c new file mode 100644 index 0000000..8d39320 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt8/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt8/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/opt8/permutations.h new file mode 100644 index 0000000..66f3cf3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt8/permutations.h @@ -0,0 +1,163 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV WORD_T(0x80400c0600000000) +#define ASCON_128A_IV WORD_T(0x80800c0800000000) +#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) +#define ASCON_HASH_IV WORD_T(0x00400c0000000100) +#define ASCON_XOF_IV WORD_T(0x00400c0000000000) + +#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt8/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/opt8/printstate.h new file mode 100644 index 0000000..2021f96 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt8/printstate.h @@ -0,0 +1,32 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" +#include "word.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt8/round.h b/ascon/Implementations/crypto_hash/asconxofv12/opt8/round.h new file mode 100644 index 0000000..15c7795 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt8/round.h @@ -0,0 +1,53 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + word_t tmp; + /* round constant */ + s->x2 = XOR(s->x2, WORD_T(C)); + /* s-box layer */ + s->x0 = XOR(s->x0, s->x4); + s->x4 = XOR(s->x4, s->x3); + s->x2 = XOR(s->x2, s->x1); + tmp = AND(s->x0, NOT(s->x4)); + s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); + s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); + s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); + s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); + s->x3 = XOR(s->x3, tmp); + s->x1 = XOR(s->x1, s->x0); + s->x3 = XOR(s->x3, s->x2); + s->x0 = XOR(s->x0, s->x4); + /* linear layer */ + tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); + s->x0 = XOR(s->x0, ROR64(tmp, 19)); + tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); + s->x1 = XOR(s->x1, ROR64(tmp, 39)); + tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); + s->x2 = XOR(s->x2, ROR64(tmp, 1)); + tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); + s->x3 = XOR(s->x3, ROR64(tmp, 10)); + tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); + s->x4 = XOR(s->x4, ROR64(tmp, 7)); + s->x2 = NOT(s->x2); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt8/word.h b/ascon/Implementations/crypto_hash/asconxofv12/opt8/word.h new file mode 100644 index 0000000..873313d --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt8/word.h @@ -0,0 +1,115 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "endian.h" + +typedef union { + uint64_t w; + uint8_t b[8]; +} word_t; + +#define U64TOWORD WORD_T +#define WORDTOU64 UINT64_T + +#define XMUL(i, x) \ + do { \ + tmp = (uint16_t)a.b[i] * (1 << (x)); \ + b.b[(byte_rol + (i)) & 0x7] ^= (uint8_t)tmp; \ + b.b[(byte_rol + (i) + 1) & 0x7] ^= tmp >> 8; \ + } while (0) + +__forceinline word_t ROR64(word_t a, int n) { + word_t b = {.w = 0ull}; + int bit_rol = (64 - n) & 0x7; + int byte_rol = (64 - n) >> 3; + uint16_t tmp; + XMUL(0, bit_rol); + XMUL(1, bit_rol); + XMUL(2, bit_rol); + XMUL(3, bit_rol); + XMUL(4, bit_rol); + XMUL(5, bit_rol); + XMUL(6, bit_rol); + XMUL(7, bit_rol); + return b; +} + +__forceinline word_t WORD_T(uint64_t x) { return (word_t){.w = x}; } + +__forceinline uint64_t UINT64_T(word_t w) { return w.w; } + +__forceinline word_t NOT(word_t a) { + a.w = ~a.w; + return a; +} + +__forceinline word_t XOR(word_t a, word_t b) { + a.w ^= b.w; + return a; +} + +__forceinline word_t AND(word_t a, word_t b) { + a.w &= b.w; + return a; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return (word_t){.w = lo2hi.w << 32 | hi2lo.w >> 32}; +} + +__forceinline uint8_t NOTZERO(word_t a, word_t b) { + uint64_t result = a.w | b.w; + result |= result >> 32; + result |= result >> 16; + result |= result >> 8; + return (uint8_t)result; +} + +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +__forceinline uint64_t MASK(int n) { + /* undefined for n == 0 */ + return ~0ull >> (64 - 8 * n); +} + +__forceinline word_t CLEAR(word_t w, int n) { + /* undefined for n == 0 */ + uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); + return AND(w, WORD_T(mask)); +} + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = *(uint64_t*)bytes; + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = *(uint64_t*)bytes & MASK(n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(n); + *(uint64_t*)bytes |= U64BIG(x); +} + +__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; + return U64TOWORD(x); +} + +__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/ref/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/ref/ascon.h index 8ab0502..c998868 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/ref/ascon.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/ref/ascon.h @@ -3,12 +3,8 @@ #include -#define WORDTOU64 - -typedef uint64_t word_t; - typedef struct { - word_t x0, x1, x2, x3, x4; + uint64_t x0, x1, x2, x3, x4; } state_t; -#endif // ASCON_H_ +#endif /* ASCON_H */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/ref/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/ref/hash.c index fe008b7..ee58594 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/ref/hash.c +++ b/ascon/Implementations/crypto_hash/asconxofv12/ref/hash.c @@ -1,8 +1,8 @@ #include "api.h" #include "ascon.h" -#include "loadstore.h" #include "permutations.h" #include "printstate.h" +#include "word.h" int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { state_t s; @@ -18,12 +18,12 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* absorb plaintext */ while (len >= ASCON_128_RATE) { - s.x0 ^= LOAD(in, 8); + s.x0 ^= LOADBYTES(in, 8); P12(&s); in += ASCON_128_RATE; len -= ASCON_128_RATE; } - s.x0 ^= LOAD(in, len); + s.x0 ^= LOADBYTES(in, len); s.x0 ^= PAD(len); P12(&s); printstate("absorb plaintext", &s); @@ -31,12 +31,12 @@ int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { /* squeeze output */ len = CRYPTO_BYTES; while (len > ASCON_128_RATE) { - STORE(out, s.x0, 8); + STOREBYTES(out, s.x0, 8); P12(&s); out += ASCON_128_RATE; len -= ASCON_128_RATE; } - STORE(out, s.x0, 8); + STOREBYTES(out, s.x0, 8); printstate("squeeze output", &s); return 0; diff --git a/ascon/Implementations/crypto_hash/asconxofv12/ref/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/ref/printstate.h index 34bd476..2021f96 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/ref/printstate.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/ref/printstate.h @@ -12,6 +12,7 @@ #include #include "ascon.h" +#include "word.h" __forceinline void printword(const char* text, const word_t x) { printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); diff --git a/ascon/Implementations/crypto_hash/asconxofv12/ref/word.h b/ascon/Implementations/crypto_hash/asconxofv12/ref/word.h new file mode 100644 index 0000000..5a1519b --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/ref/word.h @@ -0,0 +1,35 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#define WORDTOU64 +#define U64TOWORD + +typedef uint64_t word_t; + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t LOADBYTES(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STOREBYTES(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +static inline uint64_t CLEARBYTES(uint64_t x, int n) { + for (int i = 0; i < n; ++i) x &= ~SETBYTE(0xff, i); + return x; +} + +#endif /* WORD_H_ */