diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Cyclist.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Cyclist.inc b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Cyclist.inc new file mode 100644 index 0000000..f3d8ce9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Cyclist.inc @@ -0,0 +1,336 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = (unsigned int)MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + #if DEBUG + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + #endif + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = (unsigned int)MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = (unsigned int)MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = (unsigned int)MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + SnP_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = (unsigned int)MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + SnP_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodoo-SnP.h new file mode 100644 index 0000000..7d0c98b --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodoo-SnP.h @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized ARM assembler implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#define Xoodoo_FastXoofff_supported +void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen); +size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length); +size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodoo-uf-armv6-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodoo-uf-armv6-le-gcc.s new file mode 100644 index 0000000..0baa5db --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodoo-uf-armv6-le-gcc.s @@ -0,0 +1,726 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6 architecture (e.g.,@ ARM11). + + +.text + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Initialize(void *state) +@ + .align 4 +.global Xoodoo_Initialize +.type Xoodoo_Initialize, %function; +Xoodoo_Initialize: + movs r1, #0 + movs r2, #0 + movs r3, #0 + movs r12, #0 + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_AddBytes +.type Xoodoo_AddBytes, %function; +Xoodoo_AddBytes: + push {r4,lr} + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_AddBytes_Bytes +Xoodoo_AddBytes_LanesLoop: @ then, perform on lanes + ldr r2, [r0] + ldr r4, [r1], #4 + eors r2, r2, r4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_AddBytes_LanesLoop +Xoodoo_AddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_AddBytes_Exit +Xoodoo_AddBytes_BytesLoop: + ldrb r2, [r0] + ldrb r4, [r1], #1 + eors r2, r2, r4 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_AddBytes_BytesLoop +Xoodoo_AddBytes_Exit: + pop {r4,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_OverwriteBytes +.type Xoodoo_OverwriteBytes, %function; +Xoodoo_OverwriteBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_OverwriteBytes_Bytes +Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words + ldr r2, [r1], #4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_OverwriteBytes_LanesLoop +Xoodoo_OverwriteBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_OverwriteBytes_Exit +Xoodoo_OverwriteBytes_BytesLoop: + ldrb r2, [r1], #1 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_OverwriteBytes_BytesLoop +Xoodoo_OverwriteBytes_Exit: + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +@ + .align 4 +.global Xoodoo_OverwriteWithZeroes +.type Xoodoo_OverwriteWithZeroes, %function; +Xoodoo_OverwriteWithZeroes: + movs r3, #0 + lsrs r2, r1, #2 + beq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + str r3, [r0], #4 + subs r2, r2, #1 + bne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + ands r1, #3 + beq Xoodoo_OverwriteWithZeroes_Exit +Xoodoo_OverwriteWithZeroes_LoopBytes: + strb r3, [r0], #1 + subs r1, r1, #1 + bne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_Exit: + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_ExtractBytes +.type Xoodoo_ExtractBytes, %function; +Xoodoo_ExtractBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractBytes_Bytes +Xoodoo_ExtractBytes_LanesLoop: @ then, handle words + ldr r2, [r0], #4 + str r2, [r1], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractBytes_LanesLoop +Xoodoo_ExtractBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractBytes_Exit +Xoodoo_ExtractBytes_BytesLoop: + ldrb r2, [r0], #1 + strb r2, [r1], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractBytes_BytesLoop +Xoodoo_ExtractBytes_Exit: + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_ExtractAndAddBytes +.type Xoodoo_ExtractAndAddBytes, %function; +Xoodoo_ExtractAndAddBytes: + push {r4,r5} + adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length) + ldr r3, [sp, #8] @ get length argument from stack + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractAndAddBytes_Bytes +Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, handle words + ldr r5, [r0], #4 + ldr r4, [r1], #4 + eors r5, r5, r4 + str r5, [r2], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractAndAddBytes_LanesLoop +Xoodoo_ExtractAndAddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractAndAddBytes_Exit +Xoodoo_ExtractAndAddBytes_BytesLoop: + ldrb r5, [r0], #1 + ldrb r4, [r1], #1 + eors r5, r5, r4 + strb r5, [r2], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractAndAddBytes_BytesLoop +Xoodoo_ExtractAndAddBytes_Exit: + pop {r4,r5} + bx lr + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1, 0x00000003 +.equ _rc5x2, 0x0b000000 +.equ _rc4x3, 0x07000000 +.equ _rc3x4, 0x000f0000 +.equ _rc2x5, 0x0000d000 +.equ _rc1x6, 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_6rounds( void *state ) +@ + .align 4 +.global Xoodoo_Permute_6rounds +.type Xoodoo_Permute_6rounds, %function; +Xoodoo_Permute_6rounds: + push {r0,r4-r11,lr} + ldmia r0!, {r2-r5} + ldmia r0!, {r8-r9} + ldmia r0!, {r6-r7} + ldmia r0, {r10-r12,lr} + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + pop {r0,r1} + ror r2, r2, #32-(6*_r0)%32 + ror r3, r3, #32-(6*_r0)%32 + ror r4, r4, #32-(6*_r0)%32 + ror r5, r5, #32-(6*_r0)%32 + ror r6, r6, #32-(6*_r0+1)%32 + ror r7, r7, #32-(6*_r0+1)%32 + ror r8, r8, #32-(6*_r0+1)%32 + ror r9, r9, #32-(6*_r0+1)%32 + ror r10, r10, #32-(6*_r0+_e1+_w1)%32 + ror r11, r11, #32-(6*_r0+_e1+_w1)%32 + ror r12, r12, #32-(6*_r0+_e1+_w1)%32 + ror lr, lr, #32-(6*_r0+_e1+_w1)%32 + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_12rounds( void *state ) +@ + .align 4 +.global Xoodoo_Permute_12rounds +.type Xoodoo_Permute_12rounds, %function; +Xoodoo_Permute_12rounds: + push {r0,r4-r11,lr} + ldmia r0, {r2-r12,lr} + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + pop {r0,r1} + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + + +.equ Xoofff_BlockSize , 3*4*4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen) + .align 4 +.global Xoofff_AddIs +.type Xoofff_AddIs, %function; +Xoofff_AddIs: + push {r4-r10,lr} + + subs r2, r2, #Xoofff_BlockSize*8 + bcc Xoofff_AddIs_LessThanBlock +Xoofff_AddIs_BlockLoop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + subs r2, r2, #Xoofff_BlockSize*8 + bcs Xoofff_AddIs_BlockLoop +Xoofff_AddIs_LessThanBlock: + adds r2, r2, #Xoofff_BlockSize*8 + beq Xoofff_AddIs_Return + subs r2, r2, #16*8 + bcc Xoofff_AddIs_LessThan16 +Xoofff_AddIs_16Loop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + subs r2, r2, #16*8 + bcs Xoofff_AddIs_16Loop +Xoofff_AddIs_LessThan16: + adds r2, r2, #16*8 + beq Xoofff_AddIs_Return + subs r2, r2, #4*8 + bcc Xoofff_AddIs_LessThan4 +Xoofff_AddIs_4Loop: + ldr r3, [r0] + ldr r7, [r1], #4 + eors r3, r3, r7 + str r3, [r0], #4 + subs r2, r2, #4*8 + bcs Xoofff_AddIs_4Loop +Xoofff_AddIs_LessThan4: + adds r2, r2, #4*8 + beq Xoofff_AddIs_Return + subs r2, r2, #8 + bcc Xoofff_AddIs_LessThan1 +Xoofff_AddIs_1Loop: + ldrb r3, [r0] + ldrb r7, [r1], #1 + eors r3, r3, r7 + strb r3, [r0], #1 + subs r2, r2, #8 + bcs Xoofff_AddIs_1Loop +Xoofff_AddIs_LessThan1: + adds r2, r2, #8 + beq Xoofff_AddIs_Return + ldrb r3, [r0] + ldrb r7, [r1] + movs r1, #1 + eors r3, r3, r7 + lsls r1, r1, r2 + subs r1, r1, #1 + ands r3, r3, r1 + strb r3, [r0] +Xoofff_AddIs_Return: + pop {r4-r10,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length) +@ +.equ Xoofff_Compress_kRoll , 0 +.equ Xoofff_Compress_input , 4 +.equ Xoofff_Compress_xAccu , 8 +.equ Xoofff_Compress_iInput , 12 +.equ Xoofff_Compress_length , 16 + + .align 4 +.global Xoofff_CompressFastLoop +.type Xoofff_CompressFastLoop, %function; +Xoofff_CompressFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial kRoll +Xoofff_CompressFastLoop_Loop: + ldr r0, [sp, #Xoofff_Compress_input] @ add input + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors r12, r12, r1 + ldr r1, [r0], #4 + eors lr, lr, r1 + str r0, [sp, #Xoofff_Compress_input] + + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Extract and add into xAccu + ldr r0, [sp, #Xoofff_Compress_xAccu] + ldr r1, [r0] + mRloXor r2, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r2, [r0], #4 + mRloXor r3, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r3, [r0], #4 + mRloXor r4, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r4, [r0], #4 + mRloXor r5, r1, (6*_r0)%32 + str r5, [r0], #4 + + ldm r0, {r2-r5} @ note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + mRliXor r3, r9, (6*_r0+1)%32 + mRliXor r4, r6, (6*_r0+1)%32 + mRliXor r5, r7, (6*_r0+1)%32 + stm r0!, {r2-r5} + + ldm r0, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + stm r0!, {r2-r5} + + @roll kRoll + ldr r0, [sp, #Xoofff_Compress_kRoll] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + eors lr, lr, lr, LSL #13 + eors lr, lr, r2, ROR #32-3 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Compress_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Compress_length] + bcs Xoofff_CompressFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Compress_input] + ldr r1, [sp, #Xoofff_Compress_iInput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length) +@ +.equ Xoofff_Expand_yAccu , 0 +.equ Xoofff_Expand_output , 4 +.equ Xoofff_Expand_kRoll , 8 +.equ Xoofff_Expand_iOutput , 12 +.equ Xoofff_Expand_length , 16 + + .align 4 +.global Xoofff_ExpandFastLoop +.type Xoofff_ExpandFastLoop, %function; +Xoofff_ExpandFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial yAccu +Xoofff_ExpandFastLoop_Loop: + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Add k and extract + ldr r0, [sp, #Xoofff_Expand_kRoll] + ldr r1, [r0], #4 + mRloXor r2, r1, (6*_r0)%32 + + ldr r1, [sp, #Xoofff_Expand_output] + str r2, [r1], #4 + + ldr r2, [r0], #4 + mRloXor r3, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r3, [r1], #4 + mRloXor r4, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r4, [r1], #4 + mRloXor r5, r2, (6*_r0)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} @ Note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + str r2, [r1], #4 + mRliXor r3, r9, (6*_r0+1)%32 + str r3, [r1], #4 + mRliXor r4, r6, (6*_r0+1)%32 + str r4, [r1], #4 + mRliXor r5, r7, (6*_r0+1)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + str r2, [r1], #4 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + str r3, [r1], #4 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + str r4, [r1], #4 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + str r5, [r1], #4 + + @ roll-e yAccu + ldr r0, [sp, #Xoofff_Expand_yAccu] + str r1, [sp, #Xoofff_Expand_output] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + and r1, r6, r2 + eor lr, r1, lr, ROR #32-5 + eor lr, lr, r2, ROR #32-13 + eor lr, lr, #7 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Expand_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Expand_length] + bcs Xoofff_ExpandFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Expand_output] + ldr r1, [sp, #Xoofff_Expand_iOutput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodoo.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodyak-uf-armv6-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodyak-uf-armv6-le-gcc.s new file mode 100644 index 0000000..68fb7db --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodyak-uf-armv6-le-gcc.s @@ -0,0 +1,563 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6 architecture (e.g.,@ ARM11). + + +.text + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1 , 0x00000003 +.equ _rc5x2 , 0x0b000000 +.equ _rc4x3 , 0x07000000 +.equ _rc3x4 , 0x000f0000 +.equ _rc2x5 , 0x0000d000 +.equ _rc1x6 , 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +.equ offsetInstance , 0 +.equ offsetInitialLen , 16 +.equ offsetReturn , 20 + +@ ---------------------------------------------------------------------------- +@ +@ Xoodoo_Permute_12roundsAsm: only callable from asm +@ + .align 4 +.type Xoodoo_Permute_12roundsAsm, %function; +Xoodoo_Permute_12roundsAsm: + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + ldr pc, [sp, #offsetReturn] + + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@ +@ X += Xoodyak_Rkin@ +@ XLen -= Xoodyak_Rkin@ +@ } while (XLen >= Xoodyak_Rkin)@ +@ +@ return initialLength - XLen@ +@ } +@ +.equ offsetAbsorbX , 4 +.equ offsetAbsorbXLen , 8 + + .align 4 +.global Xoodyak_AbsorbKeyedFullBlocks +.type Xoodyak_AbsorbKeyedFullBlocks, %function; +Xoodyak_AbsorbKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #44 + ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbKeyedFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors lr, lr, #1 + eors r12, r12, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #44 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #44 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@ +@ X += Xoodyak_Rhash@ +@ XLen -= Xoodyak_Rhash@ +@ } while (XLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - XLen@ +@ } +@ + .align 4 +.global Xoodyak_AbsorbHashFullBlocks +.type Xoodyak_AbsorbHashFullBlocks, %function; +Xoodyak_AbsorbHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbHashFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbHashFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r6, r6, #1 + eors r5, r5, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #16 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@ +@ Y += Xoodyak_Rkout@ +@ YLen -= Xoodyak_Rkout@ +@ } while (YLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - YLen@ +@ } +@ +.equ offsetSqueezeY , 4 +.equ offsetSqueezeYLen , 8 + + .align 4 +.global Xoodyak_SqueezeKeyedFullBlocks +.type Xoodyak_SqueezeKeyedFullBlocks, %function; +Xoodyak_SqueezeKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #24 + ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeKeyedFullBlocks_Loop: + eors r2, r2, #1 + b Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + str r7, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #24 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #24 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@ +@ Y += Xoodyak_Rhash@ +@ YLen -= Xoodyak_Rhash@ +@ } while (YLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - YLen@ +@ } +@ + .align 4 +.global Xoodyak_SqueezeHashFullBlocks +.type Xoodyak_SqueezeHashFullBlocks, %function; +Xoodyak_SqueezeHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeHashFullBlocks_Loop: + eors r2, r2, #1 + b Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeHashFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #16 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ +.equ offsetCryptI , 4+8 +.equ offsetCryptO , 8+8 +.equ offsetCryptIOLen , 12 + + .align 4 +.global Xoodyak_EncryptFullBlocks +.type Xoodyak_EncryptFullBlocks, %function; +Xoodyak_EncryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_EncryptFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_EncryptFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_EncryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + eors r7, r7, r1 + str r7, [r10], #4 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_EncryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ + .align 4 +.global Xoodyak_DecryptFullBlocks +.type Xoodyak_DecryptFullBlocks, %function; +Xoodyak_DecryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_DecryptFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_DecryptFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_DecryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + mov r2, r0 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + mov r3, r1 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + mov r4, r0 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + mov r5, r1 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + mov r6, r0 + eors r7, r7, r1 + str r7, [r10], #4 + mov r7, r1 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_DecryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodyak.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodyak.c new file mode 100644 index 0000000..c5407dc --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/Xoodyak.c @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#if DEBUG +#include +#endif +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/align.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/api.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/brg_endian.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/config.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/encrypt.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/encrypt.c new file mode 100644 index 0000000..199b719 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6/encrypt.c @@ -0,0 +1,90 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_aead.h" +#include "api.h" +#include "Xoodyak.h" +#include + +#if !defined(CRYPTO_KEYBYTES) + #define CRYPTO_KEYBYTES 16 +#endif +#if !defined(CRYPTO_NPUBBYTES) + #define CRYPTO_NPUBBYTES 16 +#endif + +#define TAGLEN 16 + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + + (void)nsec; + + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, npub, CRYPTO_NPUBBYTES, NULL, 0); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Encrypt(&instance, m, c, (size_t)mlen); + Xoodyak_Squeeze(&instance, c + mlen, TAGLEN); + *clen = mlen + TAGLEN; + #if 0 + { + unsigned int i; + for (i = 0; i < *clen; ++i ) + { + printf("\\x%02x", c[i] ); + } + printf("\n"); + } + #endif + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + unsigned char tag[TAGLEN]; + unsigned long long mlen_; + + (void)nsec; + + *mlen = 0; + if (clen < TAGLEN) { + return -1; + } + mlen_ = clen - TAGLEN; + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, npub, CRYPTO_NPUBBYTES, NULL, 0); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_); + Xoodyak_Squeeze(&instance, tag, TAGLEN); + if (memcmp(tag, c + mlen_, TAGLEN) != 0) { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Cyclist.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Cyclist.inc b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Cyclist.inc new file mode 100644 index 0000000..f3d8ce9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Cyclist.inc @@ -0,0 +1,336 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = (unsigned int)MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + #if DEBUG + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + #endif + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = (unsigned int)MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = (unsigned int)MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = (unsigned int)MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + SnP_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = (unsigned int)MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + SnP_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodoo-SnP.h new file mode 100644 index 0000000..7d0c98b --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodoo-SnP.h @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized ARM assembler implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#define Xoodoo_FastXoofff_supported +void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen); +size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length); +size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodoo-u1-armv6m-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodoo-u1-armv6m-le-gcc.s new file mode 100644 index 0000000..91c20c6 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodoo-u1-armv6m-le-gcc.s @@ -0,0 +1,1092 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6m architecture (Cortex-M0, ...). + + + .thumb + .syntax unified +.text + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Initialize(void *state) +@ + .align 4 +.global Xoodoo_Initialize +.type Xoodoo_Initialize, %function; +Xoodoo_Initialize: + movs r1, #0 + movs r2, #0 + movs r3, #0 + stmia r0!, { r1 - r3 } + stmia r0!, { r1 - r3 } + stmia r0!, { r1 - r3 } + stmia r0!, { r1 - r3 } + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_AddBytes +.type Xoodoo_AddBytes, %function; +Xoodoo_AddBytes: + push {r4,lr} + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_AddBytes_Bytes + movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed + orrs r2, r2, r1 + lsls r2, #30 + bne Xoodoo_AddBytes_Bytes +Xoodoo_AddBytes_LanesLoop: @ then, perform on words + ldr r2, [r0] + ldmia r1!, {r4} + eors r2, r2, r4 + stmia r0!, {r2} + subs r3, r3, #4 + bcs Xoodoo_AddBytes_LanesLoop +Xoodoo_AddBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_AddBytes_Exit + subs r3, r3, #1 +Xoodoo_AddBytes_BytesLoop: + ldrb r2, [r0, r3] + ldrb r4, [r1, r3] + eors r2, r2, r4 + strb r2, [r0, r3] + subs r3, r3, #1 + bcs Xoodoo_AddBytes_BytesLoop +Xoodoo_AddBytes_Exit: + pop {r4,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_OverwriteBytes +.type Xoodoo_OverwriteBytes, %function; +Xoodoo_OverwriteBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_OverwriteBytes_Bytes + movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed + orrs r2, r2, r1 + lsls r2, #30 + bne Xoodoo_OverwriteBytes_Bytes +Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words + ldmia r1!, {r2} + stmia r0!, {r2} + subs r3, r3, #4 + bcs Xoodoo_OverwriteBytes_LanesLoop +Xoodoo_OverwriteBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_OverwriteBytes_Exit + subs r3, r3, #1 +Xoodoo_OverwriteBytes_BytesLoop: + ldrb r2, [r1, r3] + strb r2, [r0, r3] + subs r3, r3, #1 + bcs Xoodoo_OverwriteBytes_BytesLoop +Xoodoo_OverwriteBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +@ +.global Xoodoo_OverwriteWithZeroes +.type Xoodoo_OverwriteWithZeroes, %function; +Xoodoo_OverwriteWithZeroes: + movs r3, #0 + lsrs r2, r1, #2 + beq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + stm r0!, { r3 } + subs r2, r2, #1 + bne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + lsls r1, r1, #32-2 + beq Xoodoo_OverwriteWithZeroes_Exit + lsrs r1, r1, #32-2 +Xoodoo_OverwriteWithZeroes_LoopBytes: + subs r1, r1, #1 + strb r3, [r0, r1] + bne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractBytes +.type Xoodoo_ExtractBytes, %function; +Xoodoo_ExtractBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractBytes_Bytes + movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed + orrs r2, r2, r1 + lsls r2, #30 + bne Xoodoo_ExtractBytes_Bytes +Xoodoo_ExtractBytes_LanesLoop: @ then, perform on words + ldmia r0!, {r2} + stmia r1!, {r2} + subs r3, r3, #4 + bcs Xoodoo_ExtractBytes_LanesLoop +Xoodoo_ExtractBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_ExtractBytes_Exit + subs r3, r3, #1 +Xoodoo_ExtractBytes_BytesLoop: + ldrb r2, [r0, r3] + strb r2, [r1, r3] + subs r3, r3, #1 + bcs Xoodoo_ExtractBytes_BytesLoop +Xoodoo_ExtractBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractAndAddBytes +.type Xoodoo_ExtractAndAddBytes, %function; +Xoodoo_ExtractAndAddBytes: + push {r4,r5} + adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length) + ldr r3, [sp, #8] @ get length argument from stack + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractAndAddBytes_Bytes + movs r5, r0 @ and input/output/state pointer all 32-bit .align 8ed + orrs r5, r5, r1 + orrs r5, r5, r2 + lsls r5, #30 + bne Xoodoo_ExtractAndAddBytes_Bytes +Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, perform on words + ldmia r0!, {r5} + ldmia r1!, {r4} + eors r5, r5, r4 + stmia r2!, {r5} + subs r3, r3, #4 + bcs Xoodoo_ExtractAndAddBytes_LanesLoop +Xoodoo_ExtractAndAddBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_ExtractAndAddBytes_Exit + subs r3, r3, #1 +Xoodoo_ExtractAndAddBytes_BytesLoop: + ldrb r5, [r0, r3] + ldrb r4, [r1, r3] + eors r5, r5, r4 + strb r5, [r2, r3] + subs r3, r3, #1 + bcs Xoodoo_ExtractAndAddBytes_BytesLoop +Xoodoo_ExtractAndAddBytes_Exit: + pop {r4,r5} + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- + +@ offsets in RAM state +.equ _oA00 , 0*4 +.equ _oA01 , 1*4 +.equ _oA02 , 2*4 +.equ _oA03 , 3*4 +.equ _oA10 , 4*4 +.equ _oA11 , 5*4 +.equ _oA12 , 6*4 +.equ _oA13 , 7*4 +.equ _oA20 , 8*4 +.equ _oA21 , 9*4 +.equ _oA22 , 10*4 +.equ _oA23 , 11*4 + +@ possible locations of state lanes +.equ locRegL , 1 +.equ locRegH , 2 +.equ locMem , 3 + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _r2 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, loc, tt + mov \ro, \a1 + eors \ro, \ro, \a2 + .if \loc == locRegL + eors \ro, \ro, \a0 + .else + .if \loc == locRegH + mov \tt, \a0 + .else + ldr \tt, [sp, #\a0] + .endif + eors \ro, \ro, \tt + .endif + .endm + +.macro mXor ro, ri, tt, loc + .if \loc == locRegL + eors \ro, \ro, \ri + .else + .if \loc == locRegH + mov \tt, \ro + eors \tt, \tt, \ri + mov \ro, \tt + .else + ldr \tt, [sp, #\ro] + eors \tt, \tt, \ri + str \tt, [sp, #\ro] + .endif + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1,a0s,loc + mov \r1, \a2 + mov \r0, \a1 + bics \r1, \r1, \r0 + eors \a0, \a0, \r1 + .if \loc != locRegL + .if \loc == locRegH + mov \a0s, \a0 + .else + str \a0, [sp, #\a0s] + .endif + .endif + + mov \r0, \a0 + bics \r0, \r0, \a2 + mov \r1, \a1 + eors \r1, \r1, \r0 + mov \a1, \r1 + + bics \r1, \r1, \a0 + eors \a2, \a2, \r1 + .endm + +.macro mRound offsetRC, offsetA03 + + @ Theta: Column Parity Mixer + mXor3 r0, \offsetA03, lr, r7, locMem, r2 + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r3, r10, r4, locRegL, r2 + mXor r3, r1, r2, locRegL + mXor r10, r1, r2, locRegH + mXor r4, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r8, r11, r5, locRegH, r2 + mXor r8, r1, r2, locRegH + mXor r11, r1, r2, locRegH + mXor r5, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r9, r12, r6, locRegH, r2 + mXor r9, r1, r2, locRegH + mXor r12, r1, r2, locRegH + mXor r6, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor \offsetA03, r1, r2, locMem + mXor lr, r1, r2, locRegH + mXor r7, r1, r2, locRegL + + @ Rho-west: Plane shift + movs r0, #32-_w1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + mov r0, lr + mov lr, r12 + mov r12, r11 + mov r11, r10 + mov r10, r0 + + @ Iota: round constant + ldr r0, [sp, #\offsetRC] + ldmia r0!, {r1} + str r0, [sp, #\offsetRC] + eors r3, r3, r1 + + @ Chi: non linear step, on colums + mChi3 r3, r10, r4, r0, r1, r3, locRegL + mov r2, r8 + mChi3 r2, r11, r5, r0, r1, r8, locRegH + mov r2, r9 + mChi3 r2, r12, r6, r0, r1, r9, locRegH + ldr r2, [sp, #\offsetA03] + mChi3 r2, lr, r7, r0, r1, \offsetA03, locMem + + @ Rho-east: Plane shift + movs r0, #32-1 + mov r1, r10 + rors r1, r1, r0 + mov r10, r1 + mov r1, r11 + rors r1, r1, r0 + mov r11, r1 + mov r1, r12 + rors r1, r1, r0 + mov r12, r1 + mov r1, lr + rors r1, r1, r0 + mov lr, r1 + + movs r0, #32-_e1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + + mov r0, r4 + mov r4, r6 + mov r6, r0 + mov r0, r5 + mov r5, r7 + mov r7, r0 + + .endm + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds) +@ + +@ offsets on stack +.equ Xoodoo_Permute_Nrounds_offsetA03 , 0 +.equ Xoodoo_Permute_Nrounds_offsetRC , 4 +.equ Xoodoo_Permute_Nrounds_SAS , 8 +.equ Xoodoo_Permute_Nrounds_offsetState , Xoodoo_Permute_Nrounds_SAS + +.global Xoodoo_Permute_Nrounds +.type Xoodoo_Permute_Nrounds, %function; +Xoodoo_Permute_Nrounds: + push {r4-r6,lr} + mov r2, r8 + mov r3, r9 + mov r4, r10 + mov r5, r11 + push {r0,r2-r5,r7} + + sub sp, #Xoodoo_Permute_Nrounds_SAS + adr r2, Xoodoo_Permute_RoundConstants12 + lsls r1, r1, #2 + subs r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_Nrounds_offsetRC] + + ldm r0!, {r3,r5,r6,r7} + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_Nrounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodoo_Permute_Nrouds_Loop: + mRound Xoodoo_Permute_Nrounds_offsetRC, Xoodoo_Permute_Nrounds_offsetA03 + ldr r0, [sp, #Xoodoo_Permute_Nrounds_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoodoo_Permute_Nrouds_Done + b Xoodoo_Permute_Nrouds_Loop +Xoodoo_Permute_Nrouds_Done: + ldr r0, [sp, #Xoodoo_Permute_Nrounds_offsetState] + + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_Nrounds_offsetA03] + stm r0!, {r1,r2,r3} + + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + + add sp, #Xoodoo_Permute_Nrounds_SAS + pop {r0-r4,r7} + mov r8, r1 + mov r9, r2 + mov r10, r3 + mov r11, r4 + pop {r4-r6,pc} + .align 4 + + +Xoodoo_Permute_RoundConstants: + .long 0x00000058 + .long 0x00000038 + .long 0x000003C0 + .long 0x000000D0 + .long 0x00000120 + .long 0x00000014 + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 +Xoodoo_Permute_RoundConstants12: + .long 0 + .align 4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_6rounds( void *state ) +@ +.global Xoodoo_Permute_6rounds +.type Xoodoo_Permute_6rounds, %function; +Xoodoo_Permute_6rounds: + movs r1, #6 + b Xoodoo_Permute_Nrounds + .align 4 + + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_12rounds( void *state ) +@ +.global Xoodoo_Permute_12rounds +.type Xoodoo_Permute_12rounds, %function; +Xoodoo_Permute_12rounds: + movs r1, #12 + b Xoodoo_Permute_Nrounds + .align 4 + + + +.equ Xoofff_BlockSize , 3*4*4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen) +.global Xoofff_AddIs +.type Xoofff_AddIs, %function; +Xoofff_AddIs: + push {r4-r6,lr} + movs r3, r0 @ check input and output pointer both 32-bit .align 8ed + orrs r3, r3, r1 + lsls r3, r3, #30 + bne Xoofff_AddIs_Bytes + subs r2, r2, #16*8 + bcc Xoofff_AddIs_LessThan16 +Xoofff_AddIs_16Loop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldmia r1!, {r5,r6} + eors r3, r3, r5 + eors r4, r4, r6 + stmia r0!, {r3,r4} + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldmia r1!, {r5,r6} + eors r3, r3, r5 + eors r4, r4, r6 + stmia r0!, {r3,r4} + subs r2, r2, #16*8 + bcs Xoofff_AddIs_16Loop +Xoofff_AddIs_LessThan16: + adds r2, r2, #16*8 + beq Xoofff_AddIs_Return + subs r2, r2, #4*8 + bcc Xoofff_AddIs_LessThan4 +Xoofff_AddIs_4Loop: + ldr r3, [r0] + ldmia r1!, {r4} + eors r3, r3, r4 + stmia r0!, {r3} + subs r2, r2, #4*8 + bcs Xoofff_AddIs_4Loop +Xoofff_AddIs_LessThan4: + adds r2, r2, #4*8 + beq Xoofff_AddIs_Return +Xoofff_AddIs_Bytes: + subs r2, r2, #8 + bcc Xoofff_AddIs_LessThan1 +Xoofff_AddIs_1Loop: + ldrb r3, [r0] + ldrb r4, [r1] + adds r1, r1, #1 + eors r3, r3, r4 + strb r3, [r0] + adds r0, r0, #1 + subs r2, r2, #8 + bcs Xoofff_AddIs_1Loop +Xoofff_AddIs_LessThan1: + adds r2, r2, #8 + beq Xoofff_AddIs_Return + ldrb r3, [r0] + ldrb r4, [r1] + movs r1, #1 + eors r3, r3, r4 + lsls r1, r1, r2 + subs r1, r1, #1 + ands r3, r3, r1 + strb r3, [r0] +Xoofff_AddIs_Return: + pop {r4-r6,pc} + .align 4 + + +.macro mLdu rv, ri, tt + ldrb \rv, [\ri, #3] + lsls \rv, \rv, #8 + ldrb \tt, [\ri, #2] + orrs \rv, \rv, \tt + lsls \rv, \rv, #8 + ldrb \tt, [\ri, #1] + orrs \rv, \rv, \tt + lsls \rv, \rv, #8 + ldrb \tt, [\ri, #0] + orrs \rv, \rv, \tt + adds \ri, \ri, #4 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length) +@ + +@ offsets on stack +.equ Xoofff_CompressFastLoop_offsetA03 , 0 +.equ Xoofff_CompressFastLoop_offsetRC , 4 +.equ Xoofff_CompressFastLoop_SAS , 8 +.equ Xoofff_CompressFastLoop_kRoll , Xoofff_CompressFastLoop_SAS+0 +.equ Xoofff_CompressFastLoop_input , Xoofff_CompressFastLoop_SAS+4 +.equ Xoofff_CompressFastLoop_xAccu , Xoofff_CompressFastLoop_SAS+8+16 +.equ Xoofff_CompressFastLoop_iInput , Xoofff_CompressFastLoop_SAS+12+16 +.equ Xoofff_CompressFastLoop_length , Xoofff_CompressFastLoop_SAS+16+16 + +.global Xoofff_CompressFastLoop +.type Xoofff_CompressFastLoop, %function; +Xoofff_CompressFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r0,r2,r4-r7} + sub sp, #Xoofff_CompressFastLoop_SAS + ldm r0!, {r3,r5,r6,r7} @ get initial kRoll + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoofff_CompressFastLoop_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoofff_CompressFastLoop_Loop: + adr r1, Xoofff_CompressFastLoop_RoundConstants6 + str r1, [sp, #Xoofff_CompressFastLoop_offsetRC] + + ldr r0, [sp, #Xoofff_CompressFastLoop_input] @ add input + lsls r1, r0, #30 + bne Xoofff_CompressFastLoop_Unaligned + +Xoofff_CompressFastLoop_Aligned: + ldmia r0!, {r1} + eors r3, r3, r1 + ldmia r0!, {r1} + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + ldmia r0!, {r1} + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + ldmia r0!, {r1} + ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + + ldmia r0!, {r1} + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + ldmia r0!, {r1} + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + ldmia r0!, {r1} + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + ldmia r0!, {r1} + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + ldmia r0!, {r1,r2} + eors r4, r4, r1 + eors r5, r5, r2 + ldmia r0!, {r1,r2} + eors r6, r6, r1 + eors r7, r7, r2 + + b Xoofff_CompressFastLoop_Permute + .align 4 +Xoofff_CompressFastLoop_RoundConstants6: + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 + .long 0 + +Xoofff_CompressFastLoop_Unaligned: + mLdu r1, r0, r2 + eors r3, r3, r1 + mLdu r1, r0, r2 + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + mLdu r1, r0, r2 + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + mLdu r1, r0, r2 + ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + + mLdu r1, r0, r2 + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + mLdu r1, r0, r2 + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + mLdu r1, r0, r2 + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + mLdu r1, r0, r2 + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + mLdu r1, r0, r2 + eors r4, r4, r1 + mLdu r1, r0, r2 + eors r5, r5, r1 + mLdu r1, r0, r2 + eors r6, r6, r1 + mLdu r1, r0, r2 + eors r7, r7, r1 + +Xoofff_CompressFastLoop_Permute: + str r0, [sp, #Xoofff_CompressFastLoop_input] +Xoofff_CompressFastLoop_PermuteLoop: + mRound Xoofff_CompressFastLoop_offsetRC, Xoofff_CompressFastLoop_offsetA03 + ldr r0, [sp, #Xoofff_CompressFastLoop_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoofff_CompressFastLoop_PermuteDone + b Xoofff_CompressFastLoop_PermuteLoop +Xoofff_CompressFastLoop_PermuteDone: + + @ Extract and add into xAccu + ldr r0, [sp, #Xoofff_CompressFastLoop_xAccu] + + ldr r1, [r0] + eors r1, r1, r3 + stmia r0!, {r1} + + ldr r1, [r0] + mov r2, r8 + eors r1, r1, r2 + stmia r0!, {r1} + + ldr r1, [r0] + mov r2, r9 + eors r1, r1, r2 + stmia r0!, {r1} + + ldr r1, [r0] + ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + eors r1, r1, r2 + stmia r0!, {r1} + + + ldr r1, [r0] + mov r2, r10 + eors r1, r1, r2 + stmia r0!, {r1} + ldr r1, [r0] + mov r2, r11 + eors r1, r1, r2 + stmia r0!, {r1} + ldr r1, [r0] + mov r2, r12 + eors r1, r1, r2 + stmia r0!, {r1} + ldr r1, [r0] + mov r2, lr + eors r1, r1, r2 + stmia r0!, {r1} + + ldr r1, [r0, #0] + ldr r2, [r0, #4] + ldr r3, [r0, #8] + eors r1, r1, r4 + ldr r4, [r0, #12] + eors r2, r2, r5 + eors r3, r3, r6 + eors r4, r4, r7 + stm r0!, {r1,r2,r3,r4} + + @roll kRoll-c + ldr r0, [sp, #Xoofff_CompressFastLoop_kRoll] + ldmia r0!, {r7} + ldmia r0!, {r4-r6} + ldmia r0!, {r3} + ldmia r0!, {r1,r2} + mov r8, r1 + mov r9, r2 + ldmia r0!, {r1,r2} + str r1, [sp, #Xoofff_CompressFastLoop_offsetA03] + mov r10, r2 + ldmia r0!, {r1,r2} + mov r11, r1 + mov r12, r2 + ldmia r0!, {r1} + mov lr, r1 + + lsls r1, r7, #13 + eors r7, r7, r1 + mov r1, r3 + movs r2, #32-3 + rors r1, r1, r2 + eors r7, r7, r1 + + subs r0, r0, #Xoofff_BlockSize + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1,r2} + ldr r1, [sp, #Xoofff_CompressFastLoop_offsetA03] + mov r2, r10 + stmia r0!, {r1,r2} + mov r1, r11 + mov r2, r12 + stmia r0!, {r1,r2} + mov r1, lr + stmia r0!, {r1,r4-r7} + + @ loop management + ldr r0, [sp, #Xoofff_CompressFastLoop_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_CompressFastLoop_length] + bcc Xoofff_CompressFastLoop_Done + b Xoofff_CompressFastLoop_Loop +Xoofff_CompressFastLoop_Done: + @ return number of bytes processed + ldr r0, [sp, #Xoofff_CompressFastLoop_input] + ldr r1, [sp, #Xoofff_CompressFastLoop_iInput] + subs r0, r0, r1 + add sp, #Xoofff_CompressFastLoop_SAS+8 + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r1-r7,pc} + .align 4 + + +.macro mStu rv, ro + strb \rv, [\ro, #0] + lsrs \rv, \rv, #8 + strb \rv, [\ro, #1] + lsrs \rv, \rv, #8 + strb \rv, [\ro, #2] + lsrs \rv, \rv, #8 + strb \rv, [\ro, #3] + adds \ro, \ro, #4 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length) +@ + +@ offsets on stack +.equ Xoofff_ExpandFastLoop_offsetA03, 0 +.equ Xoofff_ExpandFastLoop_offsetRC , 4 +.equ Xoofff_ExpandFastLoop_SAS , 8 +.equ Xoofff_ExpandFastLoop_yAccu , Xoofff_ExpandFastLoop_SAS+0 +.equ Xoofff_ExpandFastLoop_output , Xoofff_ExpandFastLoop_SAS+4 +.equ Xoofff_ExpandFastLoop_kRoll , Xoofff_ExpandFastLoop_SAS+8+16 +.equ Xoofff_ExpandFastLoop_iOutput , Xoofff_ExpandFastLoop_SAS+12+16 +.equ Xoofff_ExpandFastLoop_length , Xoofff_ExpandFastLoop_SAS+16+16 + +.global Xoofff_ExpandFastLoop +.type Xoofff_ExpandFastLoop, %function; +Xoofff_ExpandFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r0,r2,r4-r7} + sub sp, #Xoofff_ExpandFastLoop_SAS + + ldm r0!, {r3,r5,r6,r7} @ get initial yAccu + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoofff_ExpandFastLoop_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoofff_ExpandFastLoop_Loop: + adr r1, Xoofff_ExpandFastLoop_RoundConstants6 + str r1, [sp, #Xoofff_ExpandFastLoop_offsetRC] +Xoofff_ExpandFastLoop_PermuteLoop: + mRound Xoofff_ExpandFastLoop_offsetRC, Xoofff_ExpandFastLoop_offsetA03 + ldr r0, [sp, #Xoofff_ExpandFastLoop_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoofff_ExpandFastLoop_PermuteDone + b Xoofff_ExpandFastLoop_PermuteLoop +Xoofff_ExpandFastLoop_RoundConstants6: + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 + .long 0 +Xoofff_ExpandFastLoop_PermuteDone: + @ Add k and extract + ldr r0, [sp, #Xoofff_ExpandFastLoop_kRoll] + ldr r1, [sp, #Xoofff_ExpandFastLoop_output] @ add input + lsls r2, r1, #30 + bne Xoofff_ExpandFastLoop_Unaligned +Xoofff_ExpandFastLoop_Aligned: + ldmia r0!, {r2} + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r8 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r9 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + ldr r3, [sp, #Xoofff_ExpandFastLoop_offsetA03] + eors r2, r2, r3 + stmia r1!, {r2} + + ldmia r0!, {r2} + mov r3, r10 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r11 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r12 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, lr + eors r2, r2, r3 + stmia r1!, {r2} + + ldmia r0!, {r2,r3} + eors r2, r2, r4 + eors r3, r3, r5 + stmia r1!, {r2,r3} + ldmia r0!, {r2,r3} + eors r2, r2, r6 + eors r3, r3, r7 + stmia r1!, {r2,r3} + b Xoofff_ExpandFastLoop_ExtractDone + +Xoofff_ExpandFastLoop_Unaligned: + ldmia r0!, {r2} + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r8 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r9 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + ldr r3, [sp, #Xoofff_ExpandFastLoop_offsetA03] + eors r2, r2, r3 + mStu r2, r1 + + ldmia r0!, {r2} + mov r3, r10 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r11 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r12 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, lr + eors r2, r2, r3 + mStu r2, r1 + + ldmia r0!, {r2,r3} + eors r2, r2, r4 + mStu r2, r1 + eors r3, r3, r5 + mStu r3, r1 + ldmia r0!, {r2,r3} + eors r2, r2, r6 + mStu r2, r1 + eors r3, r3, r7 + mStu r3, r1 + +Xoofff_ExpandFastLoop_ExtractDone: + str r1, [sp, #Xoofff_ExpandFastLoop_output] + + @ roll-e yAccu + ldr r0, [sp, #Xoofff_ExpandFastLoop_yAccu] + ldmia r0!, {r7} + ldmia r0!, {r4-r6} + ldmia r0!, {r3} + ldmia r0!, {r1,r2} + mov r8, r1 + mov r9, r2 + ldmia r0!, {r1,r2} + str r1, [sp, #Xoofff_ExpandFastLoop_offsetA03] + mov r10, r2 + ldmia r0!, {r1,r2} + mov r11, r1 + mov r12, r2 + ldmia r0!, {r1} + mov lr, r1 + + mov r1, r10 + ands r1, r1, r3 + movs r2, #32-5 + rors r7, r7, r2 + eors r7, r7, r1 + movs r2, #32-13 + mov r1, r3 + rors r1, r1, r2 + eors r7, r7, r1 + movs r1, #7 + eors r7, r7, r1 + + subs r0, r0, #Xoofff_BlockSize + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1,r2} + ldr r1, [sp, #Xoofff_ExpandFastLoop_offsetA03] + mov r2, r10 + stmia r0!, {r1,r2} + mov r1, r11 + mov r2, r12 + stmia r0!, {r1,r2} + mov r1, lr + stmia r0!, {r1,r4-r7} + + @ loop management + ldr r0, [sp, #Xoofff_ExpandFastLoop_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_ExpandFastLoop_length] + bcc Xoofff_ExpandFastLoop_Done + b Xoofff_ExpandFastLoop_Loop +Xoofff_ExpandFastLoop_Done: + @ return number of bytes processed + ldr r0, [sp, #Xoofff_ExpandFastLoop_output] + ldr r1, [sp, #Xoofff_ExpandFastLoop_iOutput] + subs r0, r0, r1 + add sp, #Xoofff_ExpandFastLoop_SAS+8 + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r1-r7,pc} + .align 4 + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodoo.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodyak-u1-armv6m-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodyak-u1-armv6m-le-gcc.s new file mode 100644 index 0000000..91ab5a2 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodyak-u1-armv6m-le-gcc.s @@ -0,0 +1,1165 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6m architecture (e.g., Cortex-M0). + + + .thumb + .syntax unified +.text + +@ ---------------------------------------------------------------------------- + +@ offsets in RAM state +.equ _oA00 , 0*4 +.equ _oA01 , 1*4 +.equ _oA02 , 2*4 +.equ _oA03 , 3*4 +.equ _oA10 , 4*4 +.equ _oA11 , 5*4 +.equ _oA12 , 6*4 +.equ _oA13 , 7*4 +.equ _oA20 , 8*4 +.equ _oA21 , 9*4 +.equ _oA22 , 10*4 +.equ _oA23 , 11*4 + +@ possible locations of state lanes +.equ locRegL , 1 +.equ locRegH , 2 +.equ locMem , 3 + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _r2 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +@ ---------------------------------------------------------------------------- + +.macro mLoadU r, p, o, t + ldrb \r, [\p, #\o+0] + ldrb \t, [\p, #\o+1] + lsls \t, \t, #8 + orrs \r, \r, \t + ldrb \t, [\p, #\o+2] + lsls \t, \t, #16 + orrs \r, \r, \t + ldrb \t, [\p, #\o+3] + lsls \t, \t, #24 + orrs \r, \r, \t + .endm + +.macro mStoreU p, o, s, t, loc + .if \loc == locRegL + strb \s, [\p, #\o+0] + lsrs \t, \s, #8 + .else + mov \t, \s + strb \t, [\p, #\o+0] + lsrs \t, \t, #8 + .endif + strb \t, [\p, #\o+1] + lsrs \t, \t, #8 + strb \t, [\p, #\o+2] + lsrs \t, \t, #8 + strb \t, [\p, #\o+3] + .endm + +.macro mXor3 ro, a0, a1, a2, loc, tt + mov \ro, \a1 + eors \ro, \ro, \a2 + .if \loc == locRegL + eors \ro, \ro, \a0 + .else + .if \loc == locRegH + mov \tt, \a0 + .else + ldr \tt, [sp, #\a0] + .endif + eors \ro, \ro, \tt + .endif + .endm + +.macro mXor ro, ri, tt, loc + .if \loc == locRegL + eors \ro, \ro, \ri + .else + .if \loc == locRegH + mov \tt, \ro + eors \tt, \tt, \ri + mov \ro, \tt + .else + ldr \tt, [sp, #\ro] + eors \tt, \tt, \ri + str \tt, [sp, #\ro] + .endif + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1,a0s,loc + mov \r1, \a2 + mov \r0, \a1 + bics \r1, \r1, \r0 + eors \a0, \a0, \r1 + .if \loc != locRegL + .if \loc == locRegH + mov \a0s, \a0 + .else + str \a0, [sp, #\a0s] + .endif + .endif + + mov \r0, \a0 + bics \r0, \r0, \a2 + mov \r1, \a1 + eors \r1, \r1, \r0 + mov \a1, \r1 + + bics \r1, \r1, \a0 + eors \a2, \a2, \r1 + .endm + +.macro mRound offsetRC, offsetA03 + + @ Theta: Column Parity Mixer + mXor3 r0, \offsetA03, lr, r7, locMem, r2 + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r3, r10, r4, locRegL, r2 + mXor r3, r1, r2, locRegL + mXor r10, r1, r2, locRegH + mXor r4, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r8, r11, r5, locRegH, r2 + mXor r8, r1, r2, locRegH + mXor r11, r1, r2, locRegH + mXor r5, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r9, r12, r6, locRegH, r2 + mXor r9, r1, r2, locRegH + mXor r12, r1, r2, locRegH + mXor r6, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor \offsetA03, r1, r2, locMem + mXor lr, r1, r2, locRegH + mXor r7, r1, r2, locRegL + + @ Rho-west: Plane shift + movs r0, #32-_w1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + mov r0, lr + mov lr, r12 + mov r12, r11 + mov r11, r10 + mov r10, r0 + + @ Iota: round constant + ldr r0, [sp, #\offsetRC] + ldmia r0!, {r1} + str r0, [sp, #\offsetRC] + eors r3, r3, r1 + + @ Chi: non linear step, on colums + mChi3 r3, r10, r4, r0, r1, r3, locRegL + mov r2, r8 + mChi3 r2, r11, r5, r0, r1, r8, locRegH + mov r2, r9 + mChi3 r2, r12, r6, r0, r1, r9, locRegH + ldr r2, [sp, #\offsetA03] + mChi3 r2, lr, r7, r0, r1, \offsetA03, locMem + + @ Rho-east: Plane shift + movs r0, #32-1 + mov r1, r10 + rors r1, r1, r0 + mov r10, r1 + mov r1, r11 + rors r1, r1, r0 + mov r11, r1 + mov r1, r12 + rors r1, r1, r0 + mov r12, r1 + mov r1, lr + rors r1, r1, r0 + mov lr, r1 + + movs r0, #32-_e1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + + mov r0, r4 + mov r4, r6 + mov r6, r0 + mov r0, r5 + mov r5, r7 + mov r7, r0 + + .endm + +@ ---------------------------------------------------------------------------- +@ +@ Xoodoo_Permute_12roundsAsm +@ + +@ offsets on stack +.equ Xoodoo_Permute_12rounds_offsetA03 , 0 +.equ Xoodoo_Permute_12rounds_offsetRC , 4 +.equ Xoodoo_Permute_12rounds_offsetReturn, 8 +.equ Xoodoo_Permute_12rounds_SAS , 12 + + .align 4 +.type Xoodoo_Permute_12roundsAsm, %function; +Xoodoo_Permute_12roundsAsm: + adr r2, Xoodoo_Permute_RoundConstants12 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetRC] +Xoodoo_Permute_12rounds_Loop: + mRound Xoodoo_Permute_12rounds_offsetRC, Xoodoo_Permute_12rounds_offsetA03 + ldr r0, [sp, #Xoodoo_Permute_12rounds_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoodoo_Permute_12rounds_Done + b Xoodoo_Permute_12rounds_Loop +Xoodoo_Permute_12rounds_Done: + ldr r0, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + bx r0 + .align 4 +Xoodoo_Permute_RoundConstants12: + .long 0x00000058 + .long 0x00000038 + .long 0x000003C0 + .long 0x000000D0 + .long 0x00000120 + .long 0x00000014 + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 + .long 0 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@ +@ X += Xoodyak_Rkin@ +@ XLen -= Xoodyak_Rkin@ +@ } while (XLen >= Xoodyak_Rkin)@ +@ return initialLength - XLen@ +@ } +@ +.equ XoodyakAbsorb_offsetState , (Xoodoo_Permute_12rounds_SAS+0) +.equ XoodyakAbsorb_offsetX , (Xoodoo_Permute_12rounds_SAS+4) +.equ XoodyakAbsorb_offsetXLen , (Xoodoo_Permute_12rounds_SAS+8) +.equ XoodyakAbsorb_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+12) + +.equ XoodyakAbsorb_SAS , (Xoodoo_Permute_12rounds_SAS+20) + + .align 4 +.global Xoodyak_AbsorbKeyedFullBlocks +.type Xoodyak_AbsorbKeyedFullBlocks, %function; +Xoodyak_AbsorbKeyedFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakAbsorb_SAS + str r0, [sp, #XoodyakAbsorb_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakAbsorb_offsetX] + str r2, [sp, #XoodyakAbsorb_offsetInitialLen] + subs r2, r2, #44 + str r2, [sp, #XoodyakAbsorb_offsetXLen] + ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_AbsorbKeyedFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_AbsorbKeyedFullBlocks_Ret: + ldr r0, [sp, #XoodyakAbsorb_offsetX] + lsls r1, r0, #30 + bne Xoodyak_AbsorbKeyedFullBlocks_Unaligned +Xoodyak_AbsorbKeyedFullBlocks_Aligned: + ldmia r0!, {r1} + eors r3, r3, r1 + ldmia r0!, {r1} + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + ldmia r0!, {r1} + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + ldmia r0!, {r1} + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + + ldmia r0!, {r1} + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + ldmia r0!, {r1} + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + ldmia r0!, {r1} + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + ldmia r0!, {r1} + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + ldmia r0!, {r1} + eors r4, r4, r1 + ldmia r0!, {r1} + eors r5, r5, r1 + ldmia r0!, {r1} + eors r6, r6, r1 +Xoodyak_AbsorbKeyedFullBlocks_EndLoop: + str r0, [sp, #XoodyakAbsorb_offsetX] + movs r2, #1 + eors r7, r7, r2 + ldr r1, [sp, #XoodyakAbsorb_offsetXLen] + subs r1, r1, #44 + str r1, [sp, #XoodyakAbsorb_offsetXLen] + bcs Xoodyak_AbsorbKeyedFullBlocks_Loop + ldr r0, [sp, #XoodyakAbsorb_offsetState] + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + + ldr r0, [sp, #XoodyakAbsorb_offsetInitialLen] + ldr r2, [sp, #XoodyakAbsorb_offsetXLen] + adds r2, r2, #44 + subs r0, r0, r2 + + add sp, #XoodyakAbsorb_SAS + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_AbsorbKeyedFullBlocks_Unaligned: + mLoadU r1, r0, 0, r2 + eors r3, r3, r1 + + mLoadU r1, r0, 4, r2 + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + + mLoadU r1, r0, 8, r2 + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + + mLoadU r1, r0, 12, r2 + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + + mLoadU r1, r0, 16, r2 + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + + mLoadU r1, r0, 20, r2 + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + + mLoadU r1, r0, 24, r2 + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + + mLoadU r1, r0, 28, r2 + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + adds r0, r0, #32 + mLoadU r1, r0, 0, r2 + eors r4, r4, r1 + mLoadU r1, r0, 4, r2 + eors r5, r5, r1 + mLoadU r1, r0, 8, r2 + eors r6, r6, r1 + adds r0, r0, #12 + b Xoodyak_AbsorbKeyedFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@ +@ X += Xoodyak_Rhash@ +@ XLen -= Xoodyak_Rhash@ +@ } while (XLen >= Xoodyak_Rhash)@ +@ return initialLength - XLen@ +@ } +@ + .align 4 +.global Xoodyak_AbsorbHashFullBlocks +.type Xoodyak_AbsorbHashFullBlocks, %function; +Xoodyak_AbsorbHashFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakAbsorb_SAS + str r0, [sp, #XoodyakAbsorb_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakAbsorb_offsetX] + str r2, [sp, #XoodyakAbsorb_offsetInitialLen] + subs r2, r2, #16 + str r2, [sp, #XoodyakAbsorb_offsetXLen] + ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_AbsorbHashFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_AbsorbHashFullBlocks_Ret: + ldr r0, [sp, #XoodyakAbsorb_offsetX] + lsls r1, r0, #30 + bne Xoodyak_AbsorbHashFullBlocks_Unaligned +Xoodyak_AbsorbHashFullBlocks_Aligned: + ldmia r0!, {r1} + eors r3, r3, r1 + ldmia r0!, {r1} + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + ldmia r0!, {r1} + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + ldmia r0!, {r1} + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] +Xoodyak_AbsorbHashFullBlocks_EndLoop: + str r0, [sp, #XoodyakAbsorb_offsetX] + movs r2, #1 + mov r1, r10 + eors r1, r1, r2 + mov r10, r1 + ldr r1, [sp, #XoodyakAbsorb_offsetXLen] + subs r1, r1, #16 + str r1, [sp, #XoodyakAbsorb_offsetXLen] + bcs Xoodyak_AbsorbHashFullBlocks_Loop + ldr r0, [sp, #XoodyakAbsorb_offsetState] + + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + + ldr r0, [sp, #XoodyakAbsorb_offsetInitialLen] + ldr r2, [sp, #XoodyakAbsorb_offsetXLen] + adds r2, r2, #16 + subs r0, r0, r2 + + add sp, #XoodyakAbsorb_SAS + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_AbsorbHashFullBlocks_Unaligned: + mLoadU r1, r0, 0, r2 + eors r3, r3, r1 + mLoadU r1, r0, 4, r2 + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + mLoadU r1, r0, 8, r2 + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + mLoadU r1, r0, 12, r2 + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + adds r0, r0, #16 + b Xoodyak_AbsorbHashFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@ +@ Y += Xoodyak_Rkout@ +@ YLen -= Xoodyak_Rkout@ +@ } while (YLen >= Xoodyak_Rkout)@ +@ return initialLength - YLen@ +@ } +@ +.equ XoodyakSqueeze_offsetState , (Xoodoo_Permute_12rounds_SAS+0) +.equ XoodyakSqueeze_offsetY , (Xoodoo_Permute_12rounds_SAS+4) +.equ XoodyakSqueeze_offsetYLen , (Xoodoo_Permute_12rounds_SAS+8) +.equ XoodyakSqueeze_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+12) + +.equ XoodyakSqueeze_SAS , (Xoodoo_Permute_12rounds_SAS+20) + + .align 4 +.global Xoodyak_SqueezeKeyedFullBlocks +.type Xoodyak_SqueezeKeyedFullBlocks, %function; +Xoodyak_SqueezeKeyedFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakSqueeze_SAS + str r0, [sp, #XoodyakSqueeze_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakSqueeze_offsetY] + str r2, [sp, #XoodyakSqueeze_offsetInitialLen] + subs r2, r2, #24 + str r2, [sp, #XoodyakSqueeze_offsetYLen] + ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_SqueezeKeyedFullBlocks_Loop: + movs r0, #1 + eors r3, r3, r0 + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_SqueezeKeyedFullBlocks_Ret: + ldr r0, [sp, #XoodyakSqueeze_offsetY] + lsls r1, r0, #30 + bne Xoodyak_SqueezeKeyedFullBlocks_Unaligned +Xoodyak_SqueezeKeyedFullBlocks_Aligned: + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1, r2} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + mov r2, r10 + stmia r0!, {r1, r2} + mov r1, r11 + stmia r0!, {r1} +Xoodyak_SqueezeKeyedFullBlocks_EndLoop: + str r0, [sp, #XoodyakSqueeze_offsetY] + ldr r1, [sp, #XoodyakSqueeze_offsetYLen] + subs r1, r1, #24 + str r1, [sp, #XoodyakSqueeze_offsetYLen] + bcs Xoodyak_SqueezeKeyedFullBlocks_Loop + ldr r0, [sp, #XoodyakSqueeze_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakSqueeze_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakSqueeze_offsetYLen] + adds r2, r2, #24 + subs r0, r0, r2 + add sp, #XoodyakSqueeze_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_SqueezeKeyedFullBlocks_Unaligned: + mStoreU r0, 0, r3, r2, locRegL + mStoreU r0, 4, r8, r2, locRegH + mStoreU r0, 8, r9, r2, locRegH + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + mStoreU r0, 12, r1, r2, locRegL + mStoreU r0, 16, r10, r2, locRegH + mStoreU r0, 20, r11, r2, locRegH + adds r0, r0, #24 + b Xoodyak_SqueezeKeyedFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@ +@ Y += Xoodyak_Rhash@ +@ YLen -= Xoodyak_Rhash@ +@ } while (YLen >= Xoodyak_Rhash)@ +@ return initialLength - YLen@ +@ } +@ + .align 4 +.global Xoodyak_SqueezeHashFullBlocks +.type Xoodyak_SqueezeHashFullBlocks, %function; +Xoodyak_SqueezeHashFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakSqueeze_SAS + str r0, [sp, #XoodyakSqueeze_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakSqueeze_offsetY] + str r2, [sp, #XoodyakSqueeze_offsetInitialLen] + subs r2, r2, #16 + str r2, [sp, #XoodyakSqueeze_offsetYLen] + ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_SqueezeHashFullBlocks_Loop: + movs r0, #1 + eors r3, r3, r0 + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_SqueezeHashFullBlocks_Ret: + ldr r0, [sp, #XoodyakSqueeze_offsetY] + lsls r1, r0, #30 + bne Xoodyak_SqueezeHashFullBlocks_Unaligned +Xoodyak_SqueezeHashFullBlocks_Aligned: + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1, r2} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stmia r0!, {r1} +Xoodyak_SqueezeHashFullBlocks_EndLoop: + str r0, [sp, #XoodyakSqueeze_offsetY] + ldr r1, [sp, #XoodyakSqueeze_offsetYLen] + subs r1, r1, #16 + str r1, [sp, #XoodyakSqueeze_offsetYLen] + bcs Xoodyak_SqueezeHashFullBlocks_Loop + ldr r0, [sp, #XoodyakSqueeze_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakSqueeze_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakSqueeze_offsetYLen] + adds r2, r2, #16 + subs r0, r0, r2 + add sp, #XoodyakSqueeze_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_SqueezeHashFullBlocks_Unaligned: + mStoreU r0, 0, r3, r2, locRegL + mStoreU r0, 4, r8, r2, locRegH + mStoreU r0, 8, r9, r2, locRegH + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + mStoreU r0, 12, r1, r2, locRegL + adds r0, r0, #16 + b Xoodyak_SqueezeHashFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ return initialLength - IOLen@ +@ } +@ +.equ XoodyakCrypt_offsetState , (Xoodoo_Permute_12rounds_SAS+0) +.equ XoodyakCrypt_offsetI , (Xoodoo_Permute_12rounds_SAS+4) +.equ XoodyakCrypt_offsetO , (Xoodoo_Permute_12rounds_SAS+8) +.equ XoodyakCrypt_offsetIOLen , (Xoodoo_Permute_12rounds_SAS+12) +.equ XoodyakCrypt_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+16) +.equ XoodyakCrypt_SAS , (Xoodoo_Permute_12rounds_SAS+20) + + .align 4 +.global Xoodyak_EncryptFullBlocks +.type Xoodyak_EncryptFullBlocks, %function; +Xoodyak_EncryptFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakCrypt_SAS + str r0, [sp, #XoodyakCrypt_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakCrypt_offsetI] + str r2, [sp, #XoodyakCrypt_offsetO] + str r3, [sp, #XoodyakCrypt_offsetInitialLen] + subs r3, r3, #24 + str r3, [sp, #XoodyakCrypt_offsetIOLen] + ldr r5, =Xoodyak_EncryptFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_EncryptFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_EncryptFullBlocks_Ret: + push {r4, r5} + ldr r5, [sp, #XoodyakCrypt_offsetI+8] + ldr r4, [sp, #XoodyakCrypt_offsetO+8] + mov r0, r4 + ands r0, r0, r5 + lsls r0, r0, #30 + bne Xoodyak_EncryptFullBlocks_Unaligned +Xoodyak_EncryptFullBlocks_Aligned: + ldmia r5!, {r0} + eors r3, r3, r0 + stmia r4!, {r3} + + ldmia r5!, {r0} + mov r1, r8 + eors r1, r1, r0 + stmia r4!, {r1} + mov r8, r1 + + ldmia r5!, {r0} + mov r1, r9 + eors r1, r1, r0 + stmia r4!, {r1} + mov r9, r1 + + ldmia r5!, {r0} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + stmia r4!, {r1} + str r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + ldmia r5!, {r0} + mov r1, r10 + eors r1, r1, r0 + stmia r4!, {r1} + mov r10, r1 + + ldmia r5!, {r0} + mov r1, r11 + eors r1, r1, r0 + stmia r4!, {r1} + mov r11, r1 +Xoodyak_EncryptFullBlocks_EndLoop: + movs r0, #1 + mov r1, r12 + eors r1, r1, r0 + mov r12, r1 + str r5, [sp, #XoodyakCrypt_offsetI+8] + str r4, [sp, #XoodyakCrypt_offsetO+8] + pop {r4, r5} + ldr r1, [sp, #XoodyakCrypt_offsetIOLen] + subs r1, r1, #24 + str r1, [sp, #XoodyakCrypt_offsetIOLen] + bcs Xoodyak_EncryptFullBlocks_Loop + ldr r0, [sp, #XoodyakCrypt_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakCrypt_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakCrypt_offsetIOLen] + adds r2, r2, #24 + subs r0, r0, r2 + add sp, #XoodyakCrypt_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_EncryptFullBlocks_Unaligned: + mLoadU r0, r5, 0, r2 + eors r3, r3, r0 + mStoreU r4, 0, r3, r2, locRegL + + mLoadU r0, r5, 4, r2 + mov r1, r8 + eors r1, r1, r0 + mStoreU r4, 4, r1, r2, locRegL + mov r8, r1 + + mLoadU r0, r5, 8, r2 + mov r1, r9 + eors r1, r1, r0 + mStoreU r4, 8, r1, r2, locRegL + mov r9, r1 + + mLoadU r0, r5, 12, r2 + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + mStoreU r4, 12, r1, r2, locRegL + str r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + mLoadU r0, r5, 16, r2 + mov r1, r10 + eors r1, r1, r0 + mStoreU r4, 16, r1, r2, locRegL + mov r10, r1 + + mLoadU r0, r5, 20, r2 + mov r1, r11 + eors r1, r1, r0 + mStoreU r4, 20, r1, r2, locRegL + mov r11, r1 + + adds r4, r4, #24 + adds r5, r5, #24 + b Xoodyak_EncryptFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ return initialLength - IOLen@ +@ } +@ + .align 4 +.global Xoodyak_DecryptFullBlocks +.type Xoodyak_DecryptFullBlocks, %function; +Xoodyak_DecryptFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakCrypt_SAS + str r0, [sp, #XoodyakCrypt_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakCrypt_offsetI] + str r2, [sp, #XoodyakCrypt_offsetO] + str r3, [sp, #XoodyakCrypt_offsetInitialLen] + subs r3, r3, #24 + str r3, [sp, #XoodyakCrypt_offsetIOLen] + ldr r5, =Xoodyak_DecryptFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_DecryptFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_DecryptFullBlocks_Ret: + push {r4, r5} + ldr r5, [sp, #XoodyakCrypt_offsetI+8] + ldr r4, [sp, #XoodyakCrypt_offsetO+8] + mov r0, r4 + ands r0, r0, r5 + lsls r0, r0, #30 + bne Xoodyak_DecryptFullBlocks_Unaligned +Xoodyak_DecryptFullBlocks_Aligned: + ldmia r5!, {r0} + eors r3, r3, r0 + stmia r4!, {r3} + mov r3, r0 + + ldmia r5!, {r0} + mov r1, r8 + eors r1, r1, r0 + stmia r4!, {r1} + mov r8, r0 + + ldmia r5!, {r0} + mov r1, r9 + eors r1, r1, r0 + stmia r4!, {r1} + mov r9, r0 + + ldmia r5!, {r0} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + stmia r4!, {r1} + str r0, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + ldmia r5!, {r0} + mov r1, r10 + eors r1, r1, r0 + stmia r4!, {r1} + mov r10, r0 + + ldmia r5!, {r0} + mov r1, r11 + eors r1, r1, r0 + stmia r4!, {r1} + mov r11, r0 +Xoodyak_DecryptFullBlocks_EndLoop: + movs r0, #1 + mov r1, r12 + eors r1, r1, r0 + mov r12, r1 + str r5, [sp, #XoodyakCrypt_offsetI+8] + str r4, [sp, #XoodyakCrypt_offsetO+8] + pop {r4, r5} + ldr r1, [sp, #XoodyakCrypt_offsetIOLen] + subs r1, r1, #24 + str r1, [sp, #XoodyakCrypt_offsetIOLen] + bcs Xoodyak_DecryptFullBlocks_Loop + ldr r0, [sp, #XoodyakCrypt_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakCrypt_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakCrypt_offsetIOLen] + adds r2, r2, #24 + subs r0, r0, r2 + add sp, #XoodyakCrypt_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_DecryptFullBlocks_Unaligned: + mLoadU r0, r5, 0, r2 + eors r3, r3, r0 + mStoreU r4, 0, r3, r2, locRegL + mov r3, r0 + + mLoadU r0, r5, 4, r2 + mov r1, r8 + eors r1, r1, r0 + mStoreU r4, 4, r1, r2, locRegL + mov r8, r0 + + mLoadU r0, r5, 8, r2 + mov r1, r9 + eors r1, r1, r0 + mStoreU r4, 8, r1, r2, locRegL + mov r9, r0 + + mLoadU r0, r5, 12, r2 + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + mStoreU r4, 12, r1, r2, locRegL + str r0, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + mLoadU r0, r5, 16, r2 + mov r1, r10 + eors r1, r1, r0 + mStoreU r4, 16, r1, r2, locRegL + mov r10, r0 + + mLoadU r0, r5, 20, r2 + mov r1, r11 + eors r1, r1, r0 + mStoreU r4, 20, r1, r2, locRegL + mov r11, r0 + + adds r4, r4, #24 + adds r5, r5, #24 + b Xoodyak_DecryptFullBlocks_EndLoop + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodyak.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodyak.c new file mode 100644 index 0000000..c5407dc --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/Xoodyak.c @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#if DEBUG +#include +#endif +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/align.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/api.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/brg_endian.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/config.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/encrypt.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/encrypt.c new file mode 100644 index 0000000..199b719 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv6M/encrypt.c @@ -0,0 +1,90 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_aead.h" +#include "api.h" +#include "Xoodyak.h" +#include + +#if !defined(CRYPTO_KEYBYTES) + #define CRYPTO_KEYBYTES 16 +#endif +#if !defined(CRYPTO_NPUBBYTES) + #define CRYPTO_NPUBBYTES 16 +#endif + +#define TAGLEN 16 + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + + (void)nsec; + + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, npub, CRYPTO_NPUBBYTES, NULL, 0); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Encrypt(&instance, m, c, (size_t)mlen); + Xoodyak_Squeeze(&instance, c + mlen, TAGLEN); + *clen = mlen + TAGLEN; + #if 0 + { + unsigned int i; + for (i = 0; i < *clen; ++i ) + { + printf("\\x%02x", c[i] ); + } + printf("\n"); + } + #endif + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + unsigned char tag[TAGLEN]; + unsigned long long mlen_; + + (void)nsec; + + *mlen = 0; + if (clen < TAGLEN) { + return -1; + } + mlen_ = clen - TAGLEN; + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, npub, CRYPTO_NPUBBYTES, NULL, 0); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_); + Xoodyak_Squeeze(&instance, tag, TAGLEN); + if (memcmp(tag, c + mlen_, TAGLEN) != 0) { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Cyclist.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Cyclist.inc b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Cyclist.inc new file mode 100644 index 0000000..f3d8ce9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Cyclist.inc @@ -0,0 +1,336 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = (unsigned int)MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + #if DEBUG + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + #endif + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = (unsigned int)MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = (unsigned int)MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = (unsigned int)MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + SnP_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = (unsigned int)MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + SnP_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodoo-SnP.h new file mode 100644 index 0000000..7d0c98b --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodoo-SnP.h @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized ARM assembler implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#define Xoodoo_FastXoofff_supported +void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen); +size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length); +size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodoo-uf-armv7m-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodoo-uf-armv7m-le-gcc.s new file mode 100644 index 0000000..0b72ec8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodoo-uf-armv7m-le-gcc.s @@ -0,0 +1,729 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (Cortex-M3, ...). + + + .thumb + .syntax unified +.text + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Initialize(void *state) +@ + .align 4 +.global Xoodoo_Initialize +.type Xoodoo_Initialize, %function; +Xoodoo_Initialize: + movs r1, #0 + movs r2, #0 + movs r3, #0 + movs r12, #0 + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_AddBytes +.type Xoodoo_AddBytes, %function; +Xoodoo_AddBytes: + push {r4,lr} + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_AddBytes_Bytes +Xoodoo_AddBytes_LanesLoop: @ then, perform on lanes + ldr r2, [r0] + ldr r4, [r1], #4 + eors r2, r2, r4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_AddBytes_LanesLoop +Xoodoo_AddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_AddBytes_Exit +Xoodoo_AddBytes_BytesLoop: + ldrb r2, [r0] + ldrb r4, [r1], #1 + eors r2, r2, r4 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_AddBytes_BytesLoop +Xoodoo_AddBytes_Exit: + pop {r4,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_OverwriteBytes +.type Xoodoo_OverwriteBytes, %function; +Xoodoo_OverwriteBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_OverwriteBytes_Bytes +Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words + ldr r2, [r1], #4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_OverwriteBytes_LanesLoop +Xoodoo_OverwriteBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_OverwriteBytes_Exit +Xoodoo_OverwriteBytes_BytesLoop: + ldrb r2, [r1], #1 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_OverwriteBytes_BytesLoop +Xoodoo_OverwriteBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +@ +.global Xoodoo_OverwriteWithZeroes +.type Xoodoo_OverwriteWithZeroes, %function; +Xoodoo_OverwriteWithZeroes: + movs r3, #0 + lsrs r2, r1, #2 + beq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + str r3, [r0], #4 + subs r2, r2, #1 + bne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + ands r1, #3 + beq Xoodoo_OverwriteWithZeroes_Exit +Xoodoo_OverwriteWithZeroes_LoopBytes: + strb r3, [r0], #1 + subs r1, r1, #1 + bne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractBytes +.type Xoodoo_ExtractBytes, %function; +Xoodoo_ExtractBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractBytes_Bytes +Xoodoo_ExtractBytes_LanesLoop: @ then, handle words + ldr r2, [r0], #4 + str r2, [r1], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractBytes_LanesLoop +Xoodoo_ExtractBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractBytes_Exit +Xoodoo_ExtractBytes_BytesLoop: + ldrb r2, [r0], #1 + strb r2, [r1], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractBytes_BytesLoop +Xoodoo_ExtractBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractAndAddBytes +.type Xoodoo_ExtractAndAddBytes, %function; +Xoodoo_ExtractAndAddBytes: + push {r4,r5} + adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length) + ldr r3, [sp, #8] @ get length argument from stack + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractAndAddBytes_Bytes +Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, handle words + ldr r5, [r0], #4 + ldr r4, [r1], #4 + eors r5, r5, r4 + str r5, [r2], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractAndAddBytes_LanesLoop +Xoodoo_ExtractAndAddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractAndAddBytes_Exit +Xoodoo_ExtractAndAddBytes_BytesLoop: + ldrb r5, [r0], #1 + ldrb r4, [r1], #1 + eors r5, r5, r4 + strb r5, [r2], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractAndAddBytes_BytesLoop +Xoodoo_ExtractAndAddBytes_Exit: + pop {r4,r5} + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1, 0x00000003 +.equ _rc5x2, 0x0b000000 +.equ _rc4x3, 0x07000000 +.equ _rc3x4, 0x000f0000 +.equ _rc2x5, 0x0000d000 +.equ _rc1x6, 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_6rounds( void *state ) +@ +.global Xoodoo_Permute_6rounds +.type Xoodoo_Permute_6rounds, %function; +Xoodoo_Permute_6rounds: + push {r0,r4-r11,lr} + ldmia r0!, {r2-r5} + ldmia r0!, {r8-r9} + ldmia r0!, {r6-r7} + ldmia r0, {r10-r12,lr} + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + pop {r0,r1} + ror r2, r2, #32-(6*_r0)%32 + ror r3, r3, #32-(6*_r0)%32 + ror r4, r4, #32-(6*_r0)%32 + ror r5, r5, #32-(6*_r0)%32 + ror r6, r6, #32-(6*_r0+1)%32 + ror r7, r7, #32-(6*_r0+1)%32 + ror r8, r8, #32-(6*_r0+1)%32 + ror r9, r9, #32-(6*_r0+1)%32 + ror r10, r10, #32-(6*_r0+_e1+_w1)%32 + ror r11, r11, #32-(6*_r0+_e1+_w1)%32 + ror r12, r12, #32-(6*_r0+_e1+_w1)%32 + ror lr, lr, #32-(6*_r0+_e1+_w1)%32 + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_12rounds( void *state ) +@ +.global Xoodoo_Permute_12rounds +.type Xoodoo_Permute_12rounds, %function; +Xoodoo_Permute_12rounds: + push {r0,r4-r11,lr} + ldmia r0, {r2-r12,lr} + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + pop {r0,r1} + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + .align 4 + + +.equ Xoofff_BlockSize , 3*4*4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen) +.global Xoofff_AddIs +.type Xoofff_AddIs, %function; +Xoofff_AddIs: + push {r4-r10,lr} + + subs r2, r2, #Xoofff_BlockSize*8 + bcc Xoofff_AddIs_LessThanBlock +Xoofff_AddIs_BlockLoop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + subs r2, r2, #Xoofff_BlockSize*8 + bcs Xoofff_AddIs_BlockLoop +Xoofff_AddIs_LessThanBlock: + adds r2, r2, #Xoofff_BlockSize*8 + beq Xoofff_AddIs_Return + subs r2, r2, #16*8 + bcc Xoofff_AddIs_LessThan16 +Xoofff_AddIs_16Loop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + subs r2, r2, #16*8 + bcs Xoofff_AddIs_16Loop +Xoofff_AddIs_LessThan16: + adds r2, r2, #16*8 + beq Xoofff_AddIs_Return + subs r2, r2, #4*8 + bcc Xoofff_AddIs_LessThan4 +Xoofff_AddIs_4Loop: + ldr r3, [r0] + ldr r7, [r1], #4 + eors r3, r3, r7 + str r3, [r0], #4 + subs r2, r2, #4*8 + bcs Xoofff_AddIs_4Loop +Xoofff_AddIs_LessThan4: + adds r2, r2, #4*8 + beq Xoofff_AddIs_Return + subs r2, r2, #8 + bcc Xoofff_AddIs_LessThan1 +Xoofff_AddIs_1Loop: + ldrb r3, [r0] + ldrb r7, [r1], #1 + eors r3, r3, r7 + strb r3, [r0], #1 + subs r2, r2, #8 + bcs Xoofff_AddIs_1Loop +Xoofff_AddIs_LessThan1: + adds r2, r2, #8 + beq Xoofff_AddIs_Return + ldrb r3, [r0] + ldrb r7, [r1] + movs r1, #1 + eors r3, r3, r7 + lsls r1, r1, r2 + subs r1, r1, #1 + ands r3, r3, r1 + strb r3, [r0] +Xoofff_AddIs_Return: + pop {r4-r10,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length) +@ +.equ Xoofff_Compress_kRoll , 0 +.equ Xoofff_Compress_input , 4 +.equ Xoofff_Compress_xAccu , 8 +.equ Xoofff_Compress_iInput , 12 +.equ Xoofff_Compress_length , 16 + +.global Xoofff_CompressFastLoop +.type Xoofff_CompressFastLoop, %function; +Xoofff_CompressFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial kRoll +Xoofff_CompressFastLoop_Loop: + ldr r0, [sp, #Xoofff_Compress_input] @ add input + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors r12, r12, r1 + ldr r1, [r0], #4 + eors lr, lr, r1 + str r0, [sp, #Xoofff_Compress_input] + + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Extract and add into xAccu + ldr r0, [sp, #Xoofff_Compress_xAccu] + ldr r1, [r0] + mRloXor r2, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r2, [r0], #4 + mRloXor r3, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r3, [r0], #4 + mRloXor r4, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r4, [r0], #4 + mRloXor r5, r1, (6*_r0)%32 + str r5, [r0], #4 + + ldm r0, {r2-r5} @ note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + mRliXor r3, r9, (6*_r0+1)%32 + mRliXor r4, r6, (6*_r0+1)%32 + mRliXor r5, r7, (6*_r0+1)%32 + stm r0!, {r2-r5} + + ldm r0, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + stm r0!, {r2-r5} + + @roll kRoll + ldr r0, [sp, #Xoofff_Compress_kRoll] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + eors lr, lr, lr, LSL #13 + eors lr, lr, r2, ROR #32-3 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Compress_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Compress_length] + bcs Xoofff_CompressFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Compress_input] + ldr r1, [sp, #Xoofff_Compress_iInput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length) +@ +.equ Xoofff_Expand_yAccu , 0 +.equ Xoofff_Expand_output , 4 +.equ Xoofff_Expand_kRoll , 8 +.equ Xoofff_Expand_iOutput , 12 +.equ Xoofff_Expand_length , 16 + +.global Xoofff_ExpandFastLoop +.type Xoofff_ExpandFastLoop, %function; +Xoofff_ExpandFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial yAccu +Xoofff_ExpandFastLoop_Loop: + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Add k and extract + ldr r0, [sp, #Xoofff_Expand_kRoll] + ldr r1, [r0], #4 + mRloXor r2, r1, (6*_r0)%32 + + ldr r1, [sp, #Xoofff_Expand_output] + str r2, [r1], #4 + + ldr r2, [r0], #4 + mRloXor r3, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r3, [r1], #4 + mRloXor r4, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r4, [r1], #4 + mRloXor r5, r2, (6*_r0)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} @ Note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + str r2, [r1], #4 + mRliXor r3, r9, (6*_r0+1)%32 + str r3, [r1], #4 + mRliXor r4, r6, (6*_r0+1)%32 + str r4, [r1], #4 + mRliXor r5, r7, (6*_r0+1)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + str r2, [r1], #4 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + str r3, [r1], #4 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + str r4, [r1], #4 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + str r5, [r1], #4 + + @ roll-e yAccu + ldr r0, [sp, #Xoofff_Expand_yAccu] + str r1, [sp, #Xoofff_Expand_output] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + and r1, r6, r2 + eor lr, r1, lr, ROR #32-5 + eor lr, lr, r2, ROR #32-13 + eor lr, lr, #7 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Expand_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Expand_length] + bcs Xoofff_ExpandFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Expand_output] + ldr r1, [sp, #Xoofff_Expand_iOutput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + .align 4 + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodoo.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodyak-uf-armv7m-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodyak-uf-armv7m-le-gcc.s new file mode 100644 index 0000000..1249039 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodyak-uf-armv7m-le-gcc.s @@ -0,0 +1,565 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (Cortex-M3, ...). + + + .thumb + .syntax unified +.text + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1 , 0x00000003 +.equ _rc5x2 , 0x0b000000 +.equ _rc4x3 , 0x07000000 +.equ _rc3x4 , 0x000f0000 +.equ _rc2x5 , 0x0000d000 +.equ _rc1x6 , 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +.equ offsetInstance , 0 +.equ offsetInitialLen , 16 +.equ offsetReturn , 20 + +@ ---------------------------------------------------------------------------- +@ +@ Xoodoo_Permute_12roundsAsm: only callable from asm +@ +.type Xoodoo_Permute_12roundsAsm, %function; +Xoodoo_Permute_12roundsAsm: + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + ldr pc, [sp, #offsetReturn] + .align 4 + + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@ +@ X += Xoodyak_Rkin@ +@ XLen -= Xoodyak_Rkin@ +@ } while (XLen >= Xoodyak_Rkin)@ +@ +@ return initialLength - XLen@ +@ } +@ +.equ offsetAbsorbX , 4 +.equ offsetAbsorbXLen , 8 + +.global Xoodyak_AbsorbKeyedFullBlocks +.type Xoodyak_AbsorbKeyedFullBlocks, %function; +Xoodyak_AbsorbKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #44 + ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbKeyedFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors lr, lr, #1 + eors r12, r12, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #44 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #44 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@ +@ X += Xoodyak_Rhash@ +@ XLen -= Xoodyak_Rhash@ +@ } while (XLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - XLen@ +@ } +@ +.global Xoodyak_AbsorbHashFullBlocks +.type Xoodyak_AbsorbHashFullBlocks, %function; +Xoodyak_AbsorbHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbHashFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbHashFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r6, r6, #1 + eors r5, r5, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #16 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@ +@ Y += Xoodyak_Rkout@ +@ YLen -= Xoodyak_Rkout@ +@ } while (YLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - YLen@ +@ } +@ +.equ offsetSqueezeY , 4 +.equ offsetSqueezeYLen , 8 + +.global Xoodyak_SqueezeKeyedFullBlocks +.type Xoodyak_SqueezeKeyedFullBlocks, %function; +Xoodyak_SqueezeKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #24 + ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeKeyedFullBlocks_Loop: + eors r2, r2, #1 + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + str r7, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #24 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #24 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@ +@ Y += Xoodyak_Rhash@ +@ YLen -= Xoodyak_Rhash@ +@ } while (YLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - YLen@ +@ } +@ +.global Xoodyak_SqueezeHashFullBlocks +.type Xoodyak_SqueezeHashFullBlocks, %function; +Xoodyak_SqueezeHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeHashFullBlocks_Loop: + eors r2, r2, #1 + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeHashFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #16 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ +.equ offsetCryptI , 4+8 +.equ offsetCryptO , 8+8 +.equ offsetCryptIOLen , 12 + +.global Xoodyak_EncryptFullBlocks +.type Xoodyak_EncryptFullBlocks, %function; +Xoodyak_EncryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_EncryptFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_EncryptFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_EncryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + eors r7, r7, r1 + str r7, [r10], #4 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_EncryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ +.global Xoodyak_DecryptFullBlocks +.type Xoodyak_DecryptFullBlocks, %function; +Xoodyak_DecryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_DecryptFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_DecryptFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_DecryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + mov r2, r0 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + mov r3, r1 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + mov r4, r0 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + mov r5, r1 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + mov r6, r0 + eors r7, r7, r1 + str r7, [r10], #4 + mov r7, r1 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_DecryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + .align 4 + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodyak.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodyak.c new file mode 100644 index 0000000..c5407dc --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/Xoodyak.c @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#if DEBUG +#include +#endif +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/align.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/api.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/brg_endian.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/config.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/encrypt.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/encrypt.c new file mode 100644 index 0000000..199b719 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-ARMv7M/encrypt.c @@ -0,0 +1,90 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_aead.h" +#include "api.h" +#include "Xoodyak.h" +#include + +#if !defined(CRYPTO_KEYBYTES) + #define CRYPTO_KEYBYTES 16 +#endif +#if !defined(CRYPTO_NPUBBYTES) + #define CRYPTO_NPUBBYTES 16 +#endif + +#define TAGLEN 16 + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + + (void)nsec; + + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, npub, CRYPTO_NPUBBYTES, NULL, 0); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Encrypt(&instance, m, c, (size_t)mlen); + Xoodyak_Squeeze(&instance, c + mlen, TAGLEN); + *clen = mlen + TAGLEN; + #if 0 + { + unsigned int i; + for (i = 0; i < *clen; ++i ) + { + printf("\\x%02x", c[i] ); + } + printf("\n"); + } + #endif + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + unsigned char tag[TAGLEN]; + unsigned long long mlen_; + + (void)nsec; + + *mlen = 0; + if (clen < TAGLEN) { + return -1; + } + mlen_ = clen - TAGLEN; + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, npub, CRYPTO_NPUBBYTES, NULL, 0); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_); + Xoodyak_Squeeze(&instance, tag, TAGLEN); + if (memcmp(tag, c + mlen_, TAGLEN) != 0) { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Cyclist.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Cyclist.inc b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Cyclist.inc new file mode 100644 index 0000000..f3d8ce9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Cyclist.inc @@ -0,0 +1,336 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = (unsigned int)MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + #if DEBUG + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + #endif + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = (unsigned int)MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = (unsigned int)MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = (unsigned int)MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + SnP_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = (unsigned int)MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + SnP_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodoo-SnP.h new file mode 100644 index 0000000..cf1b74a --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodoo-SnP.h @@ -0,0 +1,43 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "AVR8 optimized implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 1 +#define Xoodoo_HasNround + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodoo-avr8-u1.s b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodoo-avr8-u1.s new file mode 100644 index 0000000..7f83e3d --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodoo-avr8-u1.s @@ -0,0 +1,1341 @@ +; +; The eXtended Keccak Code Package (XKCP) +; https://github.com/XKCP/XKCP +; +; The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +; +; Implementation by Ronny Van Keer, hereby denoted as "the implementer". +; +; For more information, feedback or questions, please refer to the Keccak Team website: +; https://keccak.team/ +; +; To the extent possible under law, the implementer has waived all copyright +; and related or neighboring rights to the source code in this file. +; http://creativecommons.org/publicdomain/zero/1.0/ +; +; --- +; +; This file implements Xoodoo in a SnP-compatible way. +; Please refer to SnP-documentation.h for more details. +; +; This implementation comes with Xoodoo-SnP.h in the same folder. +; Please refer to LowLevel.build for the exact list of other files it must be combined with. +; + +; INFO: Tested on ATmega1280 simulator + +; Registers used in all routines +#define zero 1 +#define rpState 24 +#define rX 26 +#define rY 28 +#define rZ 30 +#define sp 0x3D + +;---------------------------------------------------------------------------- +; +; void Xoodoo_StaticInitialize( void ) +; +.global Xoodoo_StaticInitialize + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Initialize(void *state) +; +; argument state is passed in r24:r25 +; +.global Xoodoo_Initialize +Xoodoo_Initialize: + movw rZ, r24 + ldi r23, 3*4/2 ; clear state (8 bytes / 2 lanes) per iteration +Xoodoo_Initialize_Loop: + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + dec r23 + brne Xoodoo_Initialize_Loop +Xoodoo_StaticInitialize: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_AddByte(void *state, unsigned char data, unsigned int offset) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23, only LSB (r22) is used +; argument offset is passed in r20:r21, only LSB (r20) is used +; +.global Xoodoo_AddByte +Xoodoo_AddByte: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + ld r0, Z + eor r0, r22 + st Z, r0 + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23 +; argument offset is passed in r20:r21, only LSB (r20) is used +; argument length is passed in r18:r19, only LSB (r18) is used +; +.global Xoodoo_AddBytes +Xoodoo_AddBytes: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + movw rX, r22 + subi r18, 8 + brcs Xoodoo_AddBytes_Byte + ;do 8 bytes per iteration +Xoodoo_AddBytes_Loop8: + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + subi r18, 8 + brcc Xoodoo_AddBytes_Loop8 +Xoodoo_AddBytes_Byte: + ldi r19, 8 + add r18, r19 + breq Xoodoo_AddBytes_End +Xoodoo_AddBytes_Loop1: + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + dec r18 + brne Xoodoo_AddBytes_Loop1 +Xoodoo_AddBytes_End: + ret + + +;---------------------------------------------------------------------------- +; +; void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23 +; argument offset is passed in r20:r21, only LSB (r20) is used +; argument length is passed in r18:r19, only LSB (r18) is used +; +.global Xoodoo_OverwriteBytes +Xoodoo_OverwriteBytes: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + movw rX, r22 + subi r18, 8 + brcs Xoodoo_OverwriteBytes_Byte + ;do 8 bytes per iteration +Xoodoo_OverwriteBytes_Loop8: + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + subi r18, 8 + brcc Xoodoo_OverwriteBytes_Loop8 +Xoodoo_OverwriteBytes_Byte: + ldi r19, 8 + add r18, r19 + breq Xoodoo_OverwriteBytes_End +Xoodoo_OverwriteBytes_Loop1: + ld r0, X+ + st Z+, r0 + dec r18 + brne Xoodoo_OverwriteBytes_Loop1 +Xoodoo_OverwriteBytes_End: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +; +; argument state is passed in r24:r25 +; argument byteCount is passed in r22:r23, only LSB (r22) is used +; +.global Xoodoo_OverwriteWithZeroes +Xoodoo_OverwriteWithZeroes: + movw rZ, r24 ; rZ = state + mov r23, r22 + lsr r23 + lsr r23 + lsr r23 + breq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + dec r23 + brne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + andi r22, 7 + breq Xoodoo_OverwriteWithZeroes_End +Xoodoo_OverwriteWithZeroes_LoopBytes: + st Z+, r1 + dec r22 + brne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_End: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23 +; argument offset is passed in r20:r21, only LSB (r20) is used +; argument length is passed in r18:r19, only LSB (r18) is used +; +.global Xoodoo_ExtractBytes +Xoodoo_ExtractBytes: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + movw rX, r22 + subi r18, 8 + brcs Xoodoo_ExtractBytes_Byte + ;do 8 bytes per iteration +Xoodoo_ExtractBytes_Loop8: + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + subi r18, 8 + brcc Xoodoo_ExtractBytes_Loop8 +Xoodoo_ExtractBytes_Byte: + ldi r19, 8 + add r18, r19 + breq Xoodoo_ExtractBytes_End +Xoodoo_ExtractBytes_Loop1: + ld r0, Z+ + st X+, r0 + dec r18 + brne Xoodoo_ExtractBytes_Loop1 +Xoodoo_ExtractBytes_End: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument input is passed in r22:r23 +; argument output is passed in r20:r21 +; argument offset is passed in r18:r19, only LSB (r18) is used +; argument length is passed in r16:r17, only LSB (r16) is used +; +.global Xoodoo_ExtractAndAddBytes +Xoodoo_ExtractAndAddBytes: + tst r16 + breq Xoodoo_ExtractAndAddBytes_End + push r16 + push r28 + push r29 + movw rZ, r24 + add rZ, r18 + adc rZ+1, zero + movw rX, r22 + movw rY, r20 + subi r16, 8 + brcs Xoodoo_ExtractAndAddBytes_Byte +Xoodoo_ExtractAndAddBytes_LoopLane: + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + subi r16, 8 + brcc Xoodoo_ExtractAndAddBytes_LoopLane +Xoodoo_ExtractAndAddBytes_Byte: + ldi r19, 8 + add r16, r19 + breq Xoodoo_ExtractAndAddBytes_Done +Xoodoo_ExtractAndAddBytes_Loop1: + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + dec r16 + brne Xoodoo_ExtractAndAddBytes_Loop1 +Xoodoo_ExtractAndAddBytes_Done: + pop r29 + pop r28 + pop r16 +Xoodoo_ExtractAndAddBytes_End: + ret + +Xoodoo_RoundConstants_12: + .BYTE 0x58, 0x00 + .BYTE 0x38, 0x00 + .BYTE 0xC0, 0x03 + .BYTE 0xD0, 0x00 + .BYTE 0x20, 0x01 + .BYTE 0x14, 0x00 +Xoodoo_RoundConstants_6: + .BYTE 0x60, 0x00 + .BYTE 0x2C, 0x00 + .BYTE 0x80, 0x03 + .BYTE 0xF0, 0x00 + .BYTE 0xA0, 0x01 + .BYTE 0x12, 0x00 +Xoodoo_RoundConstants_0: + .BYTE 0xFF, 0 ; terminator + + .text + +; Register variables used in permutation +#define rC0 2 // 4 regs (2-5) +#define rC1 6 // 4 regs (6-9) +#define rC2 10 // 4 regs (10-13) +#define rC3 14 // 4 regs (14-17) +#define rVv 18 // 4 regs (18-21) +#define rTt 22 // 4 regs (22-25) +// r26-27 free +#define a00 0 +#define a01 4 +#define a02 8 +#define a03 12 +#define a10 16 +#define a11 20 +#define a12 24 +#define a13 28 +#define a20 32 +#define a21 36 +#define a22 40 +#define a23 44 + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Permute_Nrounds( void *state, unsigned int nrounds ) +; +; argument state is passed in r24:r25 +; argument nrounds is passed in r22:r23 (only LSB (r22) is used) +; +.global Xoodoo_Permute_Nrounds +Xoodoo_Permute_Nrounds: + mov r26, r22 + ldi rZ+0, lo8(Xoodoo_RoundConstants_0) + ldi rZ+1, hi8(Xoodoo_RoundConstants_0) + lsl r26 + sub rZ, r26 + sbc rZ+1, zero + rjmp Xoodoo_Permute + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Permute_6rounds( void *state ) +; +; argument state is passed in r24:r25 +; +.global Xoodoo_Permute_6rounds +Xoodoo_Permute_6rounds: + ldi rZ+0, lo8(Xoodoo_RoundConstants_6) + ldi rZ+1, hi8(Xoodoo_RoundConstants_6) + rjmp Xoodoo_Permute + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Permute_12rounds( void *state ) +; +; argument state is passed in r24:r25 +; +.global Xoodoo_Permute_12rounds +Xoodoo_Permute_12rounds: + ldi rZ+0, lo8(Xoodoo_RoundConstants_12) + ldi rZ+1, hi8(Xoodoo_RoundConstants_12) +Xoodoo_Permute: + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + + ; Initial Prepare Theta + movw rY, rpState + ld rC0+0, Y+ ; a00 + ld rC0+1, Y+ + ld rC0+2, Y+ + ld rC0+3, Y+ + ld rC1+0, Y+ ; a01 + ld rC1+1, Y+ + ld rC1+2, Y+ + ld rC1+3, Y+ + ld rC2+0, Y+ ; a02 + ld rC2+1, Y+ + ld rC2+2, Y+ + ld rC2+3, Y+ + ld rC3+0, Y+ ; a03 + ld rC3+1, Y+ + ld rC3+2, Y+ + ld rC3+3, Y+ + + ld r0, Y+ ; a10 + eor rC0+0, r0 + ld r0, Y+ + eor rC0+1, r0 + ld r0, Y+ + eor rC0+2, r0 + ld r0, Y+ + eor rC0+3, r0 + ld r0, Y+ ; a11 + eor rC1+0, r0 + ld r0, Y+ + eor rC1+1, r0 + ld r0, Y+ + eor rC1+2, r0 + ld r0, Y+ + eor rC1+3, r0 + ld r0, Y+ ; a12 + eor rC2+0, r0 + ld r0, Y+ + eor rC2+1, r0 + ld r0, Y+ + eor rC2+2, r0 + ld r0, Y+ + eor rC2+3, r0 + ld r0, Y+ ; a13 + eor rC3+0, r0 + ld r0, Y+ + eor rC3+1, r0 + ld r0, Y+ + eor rC3+2, r0 + ld r0, Y+ + eor rC3+3, r0 + + ld r0, Y+ ; a20 + eor rC0+0, r0 + ld r0, Y+ + eor rC0+1, r0 + ld r0, Y+ + eor rC0+2, r0 + ld r0, Y+ + eor rC0+3, r0 + ld r0, Y+ ; a21 + eor rC1+0, r0 + ld r0, Y+ + eor rC1+1, r0 + ld r0, Y+ + eor rC1+2, r0 + ld r0, Y+ + eor rC1+3, r0 + ld r0, Y+ ; a22 + eor rC2+0, r0 + ld r0, Y+ + eor rC2+1, r0 + ld r0, Y+ + eor rC2+2, r0 + ld r0, Y+ + eor rC2+3, r0 + ld r0, Y+ ; a23 + eor rC3+0, r0 + ld r0, Y+ + eor rC3+1, r0 + ld r0, Y+ + eor rC3+2, r0 + ld r0, Y+ + eor rC3+3, r0 + sbiw rY, 48 + +Xoodoo_RoundLoop: + ; Theta + Rho west + ; c0 = ROTL32(c0 ^ ROTL32(c0, 9), 5); + mov rVv+1, rC0+0 ; rol 9 + mov rVv+2, rC0+1 + mov rVv+3, rC0+2 + mov rVv+0, rC0+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC0+0 + eor rVv+1, rC0+1 + eor rVv+2, rC0+2 + eor rVv+3, rC0+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC0+0, rVv+3 + mov rC0+1, rVv+0 + mov rC0+2, rVv+1 + mov rC0+3, rVv+2 + + ; c1 = ROTL32(c1 ^ ROTL32(c1, 9), 5); + mov rVv+1, rC1+0 ; rol 9 + mov rVv+2, rC1+1 + mov rVv+3, rC1+2 + mov rVv+0, rC1+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC1+0 + eor rVv+1, rC1+1 + eor rVv+2, rC1+2 + eor rVv+3, rC1+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC1+0, rVv+3 + mov rC1+1, rVv+0 + mov rC1+2, rVv+1 + mov rC1+3, rVv+2 + + ; c2 = ROTL32(c2 ^ ROTL32(c2, 9), 5); + mov rVv+1, rC2+0 ; rol 9 + mov rVv+2, rC2+1 + mov rVv+3, rC2+2 + mov rVv+0, rC2+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC2+0 + eor rVv+1, rC2+1 + eor rVv+2, rC2+2 + eor rVv+3, rC2+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC2+0, rVv+3 + mov rC2+1, rVv+0 + mov rC2+2, rVv+1 + mov rC2+3, rVv+2 + + ; c3 = ROTL32(c3 ^ ROTL32(c3, 9), 5); + mov rVv+1, rC3+0 ; rol 9 + mov rVv+2, rC3+1 + mov rVv+3, rC3+2 + mov rVv+0, rC3+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC3+0 + eor rVv+1, rC3+1 + eor rVv+2, rC3+2 + eor rVv+3, rC3+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC3+0, rVv+3 + mov rC3+1, rVv+0 + mov rC3+2, rVv+1 + mov rC3+3, rVv+2 + + ; v1 = a13; + ldd rVv+0, Y+a13+0 + ldd rVv+1, Y+a13+1 + ldd rVv+2, Y+a13+2 + ldd rVv+3, Y+a13+3 + + ; a13 = a12 ^ c1; + ldd r0, Y+a12+0 + eor r0, rC1+0 + std Y+a13+0, r0 + ldd r0, Y+a12+1 + eor r0, rC1+1 + std Y+a13+1, r0 + ldd r0, Y+a12+2 + eor r0, rC1+2 + std Y+a13+2, r0 + ldd r0, Y+a12+3 + eor r0, rC1+3 + std Y+a13+3, r0 + + ; a12 = a11 ^ c0; + ldd r0, Y+a11+0 + eor r0, rC0+0 + std Y+a12+0, r0 + ldd r0, Y+a11+1 + eor r0, rC0+1 + std Y+a12+1, r0 + ldd r0, Y+a11+2 + eor r0, rC0+2 + std Y+a12+2, r0 + ldd r0, Y+a11+3 + eor r0, rC0+3 + std Y+a12+3, r0 + + ; a11 = a10 ^ c3; + ldd r0, Y+a10+0 + eor r0, rC3+0 + std Y+a11+0, r0 + ldd r0, Y+a10+1 + eor r0, rC3+1 + std Y+a11+1, r0 + ldd r0, Y+a10+2 + eor r0, rC3+2 + std Y+a11+2, r0 + ldd r0, Y+a10+3 + eor r0, rC3+3 + std Y+a11+3, r0 + + ; a10 = v1 ^ c2; + eor rVv+0, rC2+0 + std Y+a10+0, rVv+0 + eor rVv+1, rC2+1 + std Y+a10+1, rVv+1 + eor rVv+2, rC2+2 + std Y+a10+2, rVv+2 + eor rVv+3, rC2+3 + std Y+a10+3, rVv+3 + + ; a20 = ROTL32(a20 ^ c3, 11); + ldd rVv+0, Y+a20+3 + eor rVv+0, rC3+3 + ldd rVv+1, Y+a20+0 + eor rVv+1, rC3+0 + ldd rVv+2, Y+a20+1 + eor rVv+2, rC3+1 + ldd rVv+3, Y+a20+2 + eor rVv+3, rC3+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a20+0, rVv+0 + std Y+a20+1, rVv+1 + std Y+a20+2, rVv+2 + std Y+a20+3, rVv+3 + + ; a21 = ROTL32(a21 ^ c0, 11); + ldd rVv+0, Y+a21+3 + eor rVv+0, rC0+3 + ldd rVv+1, Y+a21+0 + eor rVv+1, rC0+0 + ldd rVv+2, Y+a21+1 + eor rVv+2, rC0+1 + ldd rVv+3, Y+a21+2 + eor rVv+3, rC0+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a21+0, rVv+0 + std Y+a21+1, rVv+1 + std Y+a21+2, rVv+2 + std Y+a21+3, rVv+3 + + ; a22 = ROTL32(a22 ^ c1, 11); + ldd rVv+0, Y+a22+3 + eor rVv+0, rC1+3 + ldd rVv+1, Y+a22+0 + eor rVv+1, rC1+0 + ldd rVv+2, Y+a22+1 + eor rVv+2, rC1+1 + ldd rVv+3, Y+a22+2 + eor rVv+3, rC1+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a22+0, rVv+0 + std Y+a22+1, rVv+1 + std Y+a22+2, rVv+2 + std Y+a22+3, rVv+3 + + ; a23 = ROTL32(a23 ^ c2, 11); + ldd rVv+0, Y+a23+3 + eor rVv+0, rC2+3 + ldd rVv+1, Y+a23+0 + eor rVv+1, rC2+0 + ldd rVv+2, Y+a23+1 + eor rVv+2, rC2+1 + ldd rVv+3, Y+a23+2 + eor rVv+3, rC2+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a23+0, rVv+0 + std Y+a23+1, rVv+1 + std Y+a23+2, rVv+2 + std Y+a23+3, rVv+3 + + ; v1 = c3; + movw rVv+0, rC3+0 + movw rVv+2, rC3+2 + + ; c3 = a03 ^ c2; /* a03 resides in c3 */ + ldd rC3+0, Y+a03+0 + eor rC3+0, rC2+0 + ldd rC3+1, Y+a03+1 + eor rC3+1, rC2+1 + ldd rC3+2, Y+a03+2 + eor rC3+2, rC2+2 + ldd rC3+3, Y+a03+3 + eor rC3+3, rC2+3 + + ; c2 = a02 ^ c1; /* a02 resides in c2 */ + ldd rC2+0, Y+a02+0 + eor rC2+0, rC1+0 + ldd rC2+1, Y+a02+1 + eor rC2+1, rC1+1 + ldd rC2+2, Y+a02+2 + eor rC2+2, rC1+2 + ldd rC2+3, Y+a02+3 + eor rC2+3, rC1+3 + + ; c1 = a01 ^ c0; /* a01 resides in c1 */ + ldd rC1+0, Y+a01+0 + eor rC1+0, rC0+0 + ldd rC1+1, Y+a01+1 + eor rC1+1, rC0+1 + ldd rC1+2, Y+a01+2 + eor rC1+2, rC0+2 + ldd rC1+3, Y+a01+3 + eor rC1+3, rC0+3 + + ; c0 = a00 ^ v1; /* a00 resides in c0 */ + ldd rC0+0, Y+a00+0 + eor rC0+0, rVv+0 + ldd rC0+1, Y+a00+1 + eor rC0+1, rVv+1 + ldd rC0+2, Y+a00+2 + eor rC0+2, rVv+2 + ldd rC0+3, Y+a00+3 + eor rC0+3, rVv+3 + + ; c0 ^= __rc; /* +Iota */ + lpm rVv+0, Z+ + lpm rVv+1, Z+ + eor rC0+0, rVv+0 + eor rC0+1, rVv+1 + + ; Chi + Rho east + Early Theta + ; a00 = c0 ^= ~a10 & a20; + ldd r0, Y+a10+0 + com r0 + ldd rTt+0, Y+a20+0 ; a20 in rTt + and r0, rTt+0 + eor rC0+0, r0 + std Y+a00+0, rC0+0 + ldd r0, Y+a10+1 + com r0 + ldd rTt+1, Y+a20+1 + and r0, rTt+1 + eor rC0+1, r0 + std Y+a00+1, rC0+1 + ldd r0, Y+a10+2 + com r0 + ldd rTt+2, Y+a20+2 + and r0, rTt+2 + eor rC0+2, r0 + std Y+a00+2, rC0+2 + ldd r0, Y+a10+3 + com r0 + ldd rTt+3, Y+a20+3 + and r0, rTt+3 + eor rC0+3, r0 + std Y+a00+3, rC0+3 + + ; a10 ^= ~a20 & c0; + com rTt+0 + and rTt+0, rC0+0 + ldd r0, Y+a10+0 + eor rTt+0, r0 ; new a10 in rTt + std Y+a10+0, rTt+0 + com rTt+1 + and rTt+1, rC0+1 + ldd r0, Y+a10+1 + eor rTt+1, r0 + std Y+a10+1, rTt+1 + com rTt+2 + and rTt+2, rC0+2 + ldd r0, Y+a10+2 + eor rTt+2, r0 + std Y+a10+2, rTt+2 + com rTt+3 + and rTt+3, rC0+3 + ldd r0, Y+a10+3 + eor rTt+3, r0 + std Y+a10+3, rTt+3 + + ; v1(a20) = ROTL32(a20 ^ ~c0 & a10, 8); + movw rVv+0, rTt+0 ; a10 in rVv + movw rVv+2, rTt+2 + mov r0, rC0+0 + com r0 + and rTt+0, r0 + ldd r0, Y+a20+0 + eor rTt+0, r0 + + mov r0, rC0+1 + com r0 + and rTt+1, r0 + ldd r0, Y+a20+1 + eor rTt+1, r0 + + mov r0, rC0+2 + com r0 + and rTt+2, r0 + ldd r0, Y+a20+2 + eor rTt+2, r0 + + mov r0, rC0+3 + com r0 + and rTt+3, r0 + ldd r0, Y+a20+3 + eor rTt+3, r0 + std Y+a20+0, rTt+3 + std Y+a20+1, rTt+0 + std Y+a20+2, rTt+1 + std Y+a20+3, rTt+2 + + ; c0 ^= a10 = ROTL32(a10, 1); + lsl rVv+0 + rol rVv+1 + std Y+a10+1, rVv+1 + eor rC0+1, rVv+1 + rol rVv+2 + std Y+a10+2, rVv+2 + eor rC0+2, rVv+2 + rol rVv+3 + std Y+a10+3, rVv+3 + eor rC0+3, rVv+3 + adc rVv+0, zero + std Y+a10+0, rVv+0 + eor rC0+0, rVv+0 + + ; a02 = c2 ^= ~a12 & a22; + ldd r0, Y+a12+0 + com r0 + ldd rVv+0, Y+a22+0 ; a22 in rVv + and r0, rVv+0 + eor rC2+0, r0 + std Y+a02+0, rC2+0 + ldd r0, Y+a12+1 + com r0 + ldd rVv+1, Y+a22+1 + and r0, rVv+1 + eor rC2+1, r0 + std Y+a02+1, rC2+1 + ldd r0, Y+a12+2 + com r0 + ldd rVv+2, Y+a22+2 + and r0, rVv+2 + eor rC2+2, r0 + std Y+a02+2, rC2+2 + ldd r0, Y+a12+3 + com r0 + ldd rVv+3, Y+a22+3 + and r0, rVv+3 + eor rC2+3, r0 + std Y+a02+3, rC2+3 + + ; a12 ^= ~a22 & c2; + mov r0, rVv+0 ; a12 in rTt + com r0 + and r0, rC2+0 + ldd rTt+0, Y+a12+0 + eor rTt+0, r0 + std Y+a12+0, rTt+0 + mov r0, rVv+1 + com r0 + and r0, rC2+1 + ldd rTt+1, Y+a12+1 + eor rTt+1, r0 + std Y+a12+1, rTt+1 + mov r0, rVv+2 + com r0 + and r0, rC2+2 + ldd rTt+2, Y+a12+2 + eor rTt+2, r0 + std Y+a12+2, rTt+2 + mov r0, rVv+3 + com r0 + and r0, rC2+3 + ldd rTt+3, Y+a12+3 + eor rTt+3, r0 + std Y+a12+3, rTt+3 + + ; c0 ^= a20 = ROTL32(a22 ^ ~c2 & a12, 8); + mov r0, rC2+0 + com r0 + and r0, rTt+0 + eor r0, rVv+0 + ldd rVv+0, Y+a20+1 ; rVv = a22 + std Y+a20+1, r0 + eor rC0+1, r0 + mov r0, rC2+1 + com r0 + and r0, rTt+1 + eor r0, rVv+1 + ldd rVv+1, Y+a20+2 + std Y+a20+2, r0 + eor rC0+2, r0 + mov r0, rC2+2 + com r0 + and r0, rTt+2 + eor r0, rVv+2 + ldd rVv+2, Y+a20+3 + std Y+a20+3, r0 + eor rC0+3, r0 + mov r0, rC2+3 + com r0 + and r0, rTt+3 + eor r0, rVv+3 + ldd rVv+3, Y+a20+0 + std Y+a20+0, r0 + eor rC0+0, r0 + + ; c2 ^= a12 = ROTL32(a12, 1); + lsl rTt+0 + rol rTt+1 + eor rC2+1, rTt+1 + std Y+a12+1, rTt+1 + rol rTt+2 + eor rC2+2, rTt+2 + std Y+a12+2, rTt+2 + rol rTt+3 + eor rC2+3, rTt+3 + std Y+a12+3, rTt+3 + adc rTt+0, zero + eor rC2+0, rTt+0 + std Y+a12+0, rTt+0 + + ; a22 = v1; + std Y+a22+0, rVv+3 + std Y+a22+1, rVv+0 + std Y+a22+2, rVv+1 + std Y+a22+3, rVv+2 + + ; c2 ^= v1; + eor rC2+0, rVv+3 + eor rC2+1, rVv+0 + eor rC2+2, rVv+1 + eor rC2+3, rVv+2 + + ; a01 = c1 ^= ~a11 & a21; + ldd rTt+0, Y+a11+0 ;rTt holds a11 + mov r0, rTt+0 + com r0 + ldd rVv+0, Y+a21+0 ;rVv holds a21 + and r0, rVv+0 + eor rC1+0, r0 + std Y+a01+0, rC1+0 + ldd rTt+1, Y+a11+1 + mov r0, rTt+1 + com r0 + ldd rVv+1, Y+a21+1 + and r0, rVv+1 + eor rC1+1, r0 + std Y+a01+1, rC1+1 + ldd rTt+2, Y+a11+2 + mov r0, rTt+2 + com r0 + ldd rVv+2, Y+a21+2 + and r0, rVv+2 + eor rC1+2, r0 + std Y+a01+2, rC1+2 + ldd rTt+3, Y+a11+3 + mov r0, rTt+3 + com r0 + ldd rVv+3, Y+a21+3 + and r0, rVv+3 + eor rC1+3, r0 + std Y+a01+3, rC1+3 + + ; a11 ^= ~a21 & c1; + mov r0, rVv+0 + com r0 + and r0, rC1+0 + eor rTt+0, r0 + std Y+a11+0, rTt+0 + mov r0, rVv+1 + com r0 + and r0, rC1+1 + eor rTt+1, r0 + std Y+a11+1, rTt+1 + mov r0, rVv+2 + com r0 + and r0, rC1+2 + eor rTt+2, r0 + std Y+a11+2, rTt+2 + mov r0, rVv+3 + com r0 + and r0, rC1+3 + eor rTt+3, r0 + std Y+a11+3, rTt+3 + + ; v1 = ROTL32(a21 ^ ~c1 & a11, 8); + mov r0, rC1+0 + com r0 + and r0, rTt+0 + eor rVv+0, r0 ; v1 not yet ROTL32'ed(8) + mov r0, rC1+1 + com r0 + and r0, rTt+1 + eor rVv+1, r0 + mov r0, rC1+2 + com r0 + and r0, rTt+2 + eor rVv+2, r0 + mov r0, rC1+3 + com r0 + and r0, rTt+3 + eor rVv+3, r0 + + ; c1 ^= a11 = ROTL32(a11, 1); + lsl rTt+0 + rol rTt+1 + eor rC1+1, rTt+1 + std Y+a11+1, rTt+1 + rol rTt+2 + eor rC1+2, rTt+2 + std Y+a11+2, rTt+2 + rol rTt+3 + eor rC1+3, rTt+3 + std Y+a11+3, rTt+3 + adc rTt+0, zero + eor rC1+0, rTt+0 + std Y+a11+0, rTt+0 + + ; a03 = c3 ^= ~a13 & a23; + ldd r0, Y+a13+0 + com r0 + ldd rTt+0, Y+a23+0 ; a23 in rTt + and r0, rTt+0 + eor rC3+0, r0 + std Y+a03+0, rC3+0 + ldd r0, Y+a13+1 + com r0 + ldd rTt+1, Y+a23+1 + and r0, rTt+1 + eor rC3+1, r0 + std Y+a03+1, rC3+1 + ldd r0, Y+a13+2 + com r0 + ldd rTt+2, Y+a23+2 + and r0, rTt+2 + eor rC3+2, r0 + std Y+a03+2, rC3+2 + ldd r0, Y+a13+3 + com r0 + ldd rTt+3, Y+a23+3 + and r0, rTt+3 + eor rC3+3, r0 + std Y+a03+3, rC3+3 + + ; a13 ^= ~a23 & c3; + mov r0, rTt+0 + com r0 + and r0, rC3+0 + ldd rTt+0, Y+a13+0 ; a13 in rTt + eor rTt+0, r0 + mov r0, rTt+1 + com r0 + and r0, rC3+1 + ldd rTt+1, Y+a13+1 + eor rTt+1, r0 + mov r0, rTt+2 + com r0 + and r0, rC3+2 + ldd rTt+2, Y+a13+2 + eor rTt+2, r0 + mov r0, rTt+3 + com r0 + and r0, rC3+3 + ldd rTt+3, Y+a13+3 + eor rTt+3, r0 + + ; c1 ^= a21 = ROTL32(a23 ^ ~c3 & a13, 8); + push rVv + mov r0, rC3+0 + com r0 + and r0, rTt+0 + ldd rVv, Y+a23+0 + eor r0, rVv + eor rC1+1, r0 + std Y+a21+1, r0 + mov r0, rC3+1 + com r0 + and r0, rTt+1 + ldd rVv, Y+a23+1 + eor r0, rVv + eor rC1+2, r0 + std Y+a21+2, r0 + mov r0, rC3+2 + com r0 + and r0, rTt+2 + ldd rVv, Y+a23+2 + eor r0, rVv + eor rC1+3, r0 + std Y+a21+3, r0 + mov r0, rC3+3 + com r0 + and r0, rTt+3 + ldd rVv, Y+a23+3 + eor r0, rVv + eor rC1+0, r0 + std Y+a21+0, r0 + pop rVv + + ; a23 = v1; + std Y+a23+0, rVv+3 ; rol8(rVv) + std Y+a23+1, rVv+0 + std Y+a23+2, rVv+1 + std Y+a23+3, rVv+2 + + ; c3 ^= v1; + eor rC3+0, rVv+3 + eor rC3+1, rVv+0 + eor rC3+2, rVv+1 + eor rC3+3, rVv+2 + + ; c3 ^= a13 = ROTL32(a13, 1); + lsl rTt+0 + rol rTt+1 + std Y+a13+1, rTt+1 + eor rC3+1, rTt+1 + rol rTt+2 + std Y+a13+2, rTt+2 + eor rC3+2, rTt+2 + rol rTt+3 + std Y+a13+3, rTt+3 + eor rC3+3, rTt+3 + adc rTt+0, zero + std Y+a13+0, rTt+0 + eor rC3+0, rTt+0 + + ; Check for terminator + lpm r0, Z + inc r0 + breq Xoodoo_Done + rjmp Xoodoo_RoundLoop +Xoodoo_Done: + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + ret diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodoo.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodyak.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodyak.c new file mode 100644 index 0000000..c5407dc --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/Xoodyak.c @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#if DEBUG +#include +#endif +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/align.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/api.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/brg_endian.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/config.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/encrypt.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/encrypt.c new file mode 100644 index 0000000..199b719 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-AVR8/encrypt.c @@ -0,0 +1,90 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_aead.h" +#include "api.h" +#include "Xoodyak.h" +#include + +#if !defined(CRYPTO_KEYBYTES) + #define CRYPTO_KEYBYTES 16 +#endif +#if !defined(CRYPTO_NPUBBYTES) + #define CRYPTO_NPUBBYTES 16 +#endif + +#define TAGLEN 16 + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + + (void)nsec; + + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, npub, CRYPTO_NPUBBYTES, NULL, 0); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Encrypt(&instance, m, c, (size_t)mlen); + Xoodyak_Squeeze(&instance, c + mlen, TAGLEN); + *clen = mlen + TAGLEN; + #if 0 + { + unsigned int i; + for (i = 0; i < *clen; ++i ) + { + printf("\\x%02x", c[i] ); + } + printf("\n"); + } + #endif + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + unsigned char tag[TAGLEN]; + unsigned long long mlen_; + + (void)nsec; + + *mlen = 0; + if (clen < TAGLEN) { + return -1; + } + mlen_ = clen - TAGLEN; + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, npub, CRYPTO_NPUBBYTES, NULL, 0); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_); + Xoodyak_Squeeze(&instance, tag, TAGLEN); + if (memcmp(tag, c + mlen_, TAGLEN) != 0) { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Cyclist.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Cyclist.inc b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Cyclist.inc new file mode 100644 index 0000000..f3d8ce9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Cyclist.inc @@ -0,0 +1,336 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = (unsigned int)MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + #if DEBUG + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + #endif + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = (unsigned int)MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = (unsigned int)MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = (unsigned int)MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + SnP_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = (unsigned int)MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + SnP_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + #if DEBUG + assert(instance->mode == Cyclist_ModeKeyed); + #endif + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodoo-SnP.h new file mode 100644 index 0000000..2927919 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodoo-SnP.h @@ -0,0 +1,56 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 +#define Xoodoo_HasNround + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +//#define Xoodoo_FastXoofff_supported +//void Xoofff_AddIs( uint8_t *output, const uint8_t *input, size_t bitLen); +//size_t Xoofff_CompressFastLoop(uint8_t *k, uint8_t *xAccu, const uint8_t *input, size_t length); +//size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodoo-optimized.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodoo-optimized.c new file mode 100644 index 0000000..ce86971 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodoo-optimized.c @@ -0,0 +1,399 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include +#include "Xoodoo.h" + +#define VERBOSE 0 + +#if (VERBOSE > 0) + #define Dump(__t) printf(__t "\n"); \ + printf("a00 %08x, a01 %08x, a02 %08x, a03 %08x\n", a00, a01, a02, a03 ); \ + printf("a10 %08x, a11 %08x, a12 %08x, a13 %08x\n", a10, a11, a12, a13 ); \ + printf("a20 %08x, a21 %08x, a22 %08x, a23 %08x\n\n", a20, a21, a22, a23 ); +#else + #define Dump(__t) +#endif + +#if (VERBOSE >= 1) + #define Dump1(__t) Dump(__t) +#else + #define Dump1(__t) +#endif + +#if (VERBOSE >= 2) + #define Dump2(__t) Dump(__t) +#else + #define Dump2(__t) +#endif + +#if (VERBOSE >= 3) + #define Dump3(__t) Dump(__t) +#else + #define Dump3(__t) +#endif + +/* ---------------------------------------------------------------- */ + +void Xoodoo_Initialize(void *state) +{ + memset(state, 0, NLANES*sizeof(tXoodooLane)); +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_AddBytes(void *argState, const unsigned char *argdata, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + if (length == (3*4*4)) { + uint32_t *state = (uint32_t *)argState; + uint32_t *data = (uint32_t *)argdata; + state[0] ^= data[0]; + state[1] ^= data[1]; + state[2] ^= data[2]; + state[3] ^= data[3]; + state[4] ^= data[4]; + state[5] ^= data[5]; + state[6] ^= data[6]; + state[7] ^= data[7]; + state[8] ^= data[8]; + state[9] ^= data[9]; + state[10] ^= data[10]; + state[11] ^= data[11]; + } + else { + unsigned int sizeLeft = length; + unsigned int lanePosition = offset/4; + unsigned int offsetInLane = offset%4; + const unsigned char *curData = argdata; + uint32_t *state = (uint32_t*)argState; + + state += lanePosition; + if ((sizeLeft > 0) && (offsetInLane != 0)) { + unsigned int bytesInLane = 4 - offsetInLane; + uint32_t lane = 0; + if (bytesInLane > sizeLeft) + bytesInLane = sizeLeft; + memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane); + *state++ ^= lane; + sizeLeft -= bytesInLane; + curData += bytesInLane; + } + + while(sizeLeft >= 4) { + *state++ ^= READ32_UNALIGNED( curData ); + sizeLeft -= 4; + curData += 4; + } + + if (sizeLeft > 0) { + uint32_t lane = 0; + memcpy(&lane, curData, sizeLeft); + *state ^= lane; + } + } +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_OverwriteBytes(void *argstate, const unsigned char *argdata, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + if (length == (3*4*4)) { + uint32_t *state = (uint32_t *)argstate; + uint32_t *data = (uint32_t *)argdata; + state[0] = data[0]; + state[1] = data[1]; + state[2] = data[2]; + state[3] = data[3]; + state[4] = data[4]; + state[5] = data[5]; + state[6] = data[6]; + state[7] = data[7]; + state[8] = data[8]; + state[9] = data[9]; + state[10] = data[10]; + state[11] = data[11]; + } + else + memcpy((unsigned char*)argstate+offset, argdata, length); +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_OverwriteWithZeroes(void *argstate, unsigned int byteCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memset(argstate, 0, byteCount); +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, (unsigned char*)state+offset, length); +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_ExtractAndAddBytes(const void *argState, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + if (length == (3*4*4)) { + uint32_t *state = (uint32_t *)argState; + const uint32_t *ii = (const uint32_t *)input; + uint32_t *oo = (uint32_t *)output; + + oo[0] = state[0] ^ ii[0]; + oo[1] = state[1] ^ ii[1]; + oo[2] = state[2] ^ ii[2]; + oo[3] = state[3] ^ ii[3]; + oo[4] = state[4] ^ ii[4]; + oo[5] = state[5] ^ ii[5]; + oo[6] = state[6] ^ ii[6]; + oo[7] = state[7] ^ ii[7]; + oo[8] = state[8] ^ ii[8]; + oo[9] = state[9] ^ ii[9]; + oo[10] = state[10] ^ ii[10]; + oo[11] = state[11] ^ ii[11]; + } + else { + unsigned int sizeLeft = length; + unsigned int lanePosition = offset/4; + unsigned int offsetInLane = offset%4; + const unsigned char *curInput = input; + unsigned char *curOutput = output; + const uint32_t *state = (const uint32_t*)argState; + + state += lanePosition; + if ((sizeLeft > 0) && (offsetInLane != 0)) { + unsigned int bytesInLane = 4 - offsetInLane; + uint32_t lane = *state++ >> (offsetInLane * 8); + if (bytesInLane > sizeLeft) + bytesInLane = sizeLeft; + sizeLeft -= bytesInLane; + do { + *curOutput++ = (*curInput++) ^ (unsigned char)lane; + lane >>= 8; + } + while ( --bytesInLane != 0); + } + + while(sizeLeft >= 4) { + WRITE32_UNALIGNED( curOutput, READ32_UNALIGNED( curInput ) ^ *state++ ); + sizeLeft -= 4; + curInput += 4; + curOutput += 4; + } + + if (sizeLeft > 0) { + uint32_t lane = *state; + do { + *curOutput++ = (*curInput++) ^ (unsigned char)lane; + lane >>= 8; + } + while ( --sizeLeft != 0 ); + } + } +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +#define DeclareVars uint32_t a00, a01, a02, a03; \ + uint32_t a10, a11, a12, a13; \ + uint32_t a20, a21, a22, a23; \ + uint32_t v1, v2 + +#define State2Vars a00 = state[0+0], a01 = state[0+1], a02 = state[0+2], a03 = state[0+3]; \ + a10 = state[4+0], a11 = state[4+1], a12 = state[4+2], a13 = state[4+3]; \ + a20 = state[8+0], a21 = state[8+1], a22 = state[8+2], a23 = state[8+3] + +#define Vars2State state[0+0] = a00, state[0+1] = a01, state[0+2] = a02, state[0+3] = a03; \ + state[4+0] = a10, state[4+1] = a11, state[4+2] = a12, state[4+3] = a13; \ + state[8+0] = a20, state[8+1] = a21, state[8+2] = a22, state[8+3] = a23 + +/* +** Theta: Column Parity Mixer +*/ +#define Theta() \ + v1 = a03 ^ a13 ^ a23; \ + v2 = a00 ^ a10 ^ a20; \ + v1 = ROTL32(v1, 5) ^ ROTL32(v1, 14); \ + a00 ^= v1; \ + a10 ^= v1; \ + a20 ^= v1; \ + v1 = a01 ^ a11 ^ a21; \ + v2 = ROTL32(v2, 5) ^ ROTL32(v2, 14); \ + a01 ^= v2; \ + a11 ^= v2; \ + a21 ^= v2; \ + v2 = a02 ^ a12 ^ a22; \ + v1 = ROTL32(v1, 5) ^ ROTL32(v1, 14); \ + a02 ^= v1; \ + a12 ^= v1; \ + a22 ^= v1; \ + v2 = ROTL32(v2, 5) ^ ROTL32(v2, 14); \ + a03 ^= v2; \ + a13 ^= v2; \ + a23 ^= v2 + +/* +** Rho-west: Plane shift +*/ +#define Rho_west() \ + a20 = ROTL32(a20, 11); \ + a21 = ROTL32(a21, 11); \ + a22 = ROTL32(a22, 11); \ + a23 = ROTL32(a23, 11); \ + v1 = a13; \ + a13 = a12; \ + a12 = a11; \ + a11 = a10; \ + a10 = v1 + +/* +** Iota: Round constants +*/ +#define Iota(__rc) a00 ^= __rc + +/* +** Chi: Non linear step, on colums +*/ +#define Chi() \ + a00 ^= ~a10 & a20; \ + a10 ^= ~a20 & a00; \ + a20 ^= ~a00 & a10; \ + \ + a01 ^= ~a11 & a21; \ + a11 ^= ~a21 & a01; \ + a21 ^= ~a01 & a11; \ + \ + a02 ^= ~a12 & a22; \ + a12 ^= ~a22 & a02; \ + a22 ^= ~a02 & a12; \ + \ + a03 ^= ~a13 & a23; \ + a13 ^= ~a23 & a03; \ + a23 ^= ~a03 & a13 + +/* +** Rho-east: Plane shift +*/ +#define Rho_east() \ + a10 = ROTL32(a10, 1); \ + a11 = ROTL32(a11, 1); \ + a12 = ROTL32(a12, 1); \ + a13 = ROTL32(a13, 1); \ + v1 = ROTL32(a23, 8); \ + a23 = ROTL32(a21, 8); \ + a21 = v1; \ + v1 = ROTL32(a22, 8); \ + a22 = ROTL32(a20, 8); \ + a20 = v1 + +#define Round(__rc) \ + Theta(); \ + Dump3("Theta"); \ + Rho_west(); \ + Dump3("Rho-west"); \ + Iota(__rc); \ + Dump3("Iota"); \ + Chi(); \ + Dump3("Chi"); \ + Rho_east(); \ + Dump3("Rho-east") + +static const uint32_t RC[MAXROUNDS] = { + _rc12, + _rc11, + _rc10, + _rc9, + _rc8, + _rc7, + _rc6, + _rc5, + _rc4, + _rc3, + _rc2, + _rc1 +}; + +void Xoodoo_Permute_Nrounds( uint32_t * state, uint32_t nr ) +{ + DeclareVars; + uint32_t i; + + State2Vars; + for (i = MAXROUNDS - nr; i < MAXROUNDS; ++i ) { + Round(RC[i]); + Dump2("Round"); + } + Dump1("Permutation"); + Vars2State; +} + +void Xoodoo_Permute_6rounds( uint32_t * state) +{ + DeclareVars; + + State2Vars; + Round(_rc6); + Round(_rc5); + Round(_rc4); + Round(_rc3); + Round(_rc2); + Round(_rc1); + Dump1("Permutation"); + Vars2State; +} + +void Xoodoo_Permute_12rounds( uint32_t * state) +{ + DeclareVars; + + State2Vars; + Round(_rc12); + Round(_rc11); + Round(_rc10); + Round(_rc9); + Round(_rc8); + Round(_rc7); + Round(_rc6); + Round(_rc5); + Round(_rc4); + Round(_rc3); + Round(_rc2); + Round(_rc1); + Dump1("Permutation"); + Vars2State; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodoo.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodyak-full-blocks.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodyak-full-blocks.c new file mode 100644 index 0000000..99a62ea --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodyak-full-blocks.c @@ -0,0 +1,127 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include +#include +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +#ifdef OUTPUT +#include +#endif + +#define SnP_AddByte Xoodoo_AddByte +#define SnP_AddBytes Xoodoo_AddBytes +#define SnP_ExtractBytes Xoodoo_ExtractBytes +#define SnP_ExtractAndAddBytes Xoodoo_ExtractAndAddBytes +#define SnP_Permute Xoodoo_Permute_12rounds +#define SnP_OverwriteBytes Xoodoo_OverwriteBytes + +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +{ + size_t initialLength = XLen; + + do { + SnP_Permute(state); /* Xoodyak_Up(instance, NULL, 0, 0); */ + SnP_AddBytes(state, X, 0, Xoodyak_Rkin); /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0); */ + SnP_AddByte(state, 0x01, Xoodyak_Rkin); + X += Xoodyak_Rkin; + XLen -= Xoodyak_Rkin; + } while (XLen >= Xoodyak_Rkin); + + return initialLength - XLen; +} + +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +{ + size_t initialLength = XLen; + + do { + SnP_Permute(state); /* Xoodyak_Up(instance, NULL, 0, 0); */ + SnP_AddBytes(state, X, 0, Xoodyak_Rhash); /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0); */ + SnP_AddByte(state, 0x01, Xoodyak_Rhash); + X += Xoodyak_Rhash; + XLen -= Xoodyak_Rhash; + } while (XLen >= Xoodyak_Rhash); + + return initialLength - XLen; +} + + +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +{ + size_t initialLength = YLen; + + do { + SnP_AddByte(state, 0x01, 0); /* Xoodyak_Down(instance, NULL, 0, 0); */ + SnP_Permute(state); /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0); */ + SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout); + Y += Xoodyak_Rkout; + YLen -= Xoodyak_Rkout; + } while (YLen >= Xoodyak_Rkout); + + return initialLength - YLen; +} + +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +{ + size_t initialLength = YLen; + + do { + SnP_AddByte(state, 0x01, 0); /* Xoodyak_Down(instance, NULL, 0, 0); */ + SnP_Permute(state); /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0); */ + SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash); + Y += Xoodyak_Rhash; + YLen -= Xoodyak_Rhash; + } while (YLen >= Xoodyak_Rhash); + + return initialLength - YLen; +} + +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +{ + size_t initialLength = IOLen; + + do { + SnP_Permute(state); + SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout); + SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout); + SnP_AddByte(state, 0x01, Xoodyak_Rkout); + I += Xoodyak_Rkout; + O += Xoodyak_Rkout; + IOLen -= Xoodyak_Rkout; + } while (IOLen >= Xoodyak_Rkout); + + return initialLength - IOLen; +} + +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +{ + size_t initialLength = IOLen; + + do { + SnP_Permute(state); + SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout); + SnP_AddBytes(state, O, 0, Xoodyak_Rkout); + SnP_AddByte(state, 0x01, Xoodyak_Rkout); + I += Xoodyak_Rkout; + O += Xoodyak_Rkout; + IOLen -= Xoodyak_Rkout; + } while (IOLen >= Xoodyak_Rkout); + + return initialLength - IOLen; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodyak.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodyak.c new file mode 100644 index 0000000..c5407dc --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/Xoodyak.c @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#if DEBUG +#include +#endif +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/align.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/api.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/brg_endian.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/config.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/encrypt.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/encrypt.c new file mode 100644 index 0000000..199b719 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/XKCP-plain-ua/encrypt.c @@ -0,0 +1,90 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_aead.h" +#include "api.h" +#include "Xoodyak.h" +#include + +#if !defined(CRYPTO_KEYBYTES) + #define CRYPTO_KEYBYTES 16 +#endif +#if !defined(CRYPTO_NPUBBYTES) + #define CRYPTO_NPUBBYTES 16 +#endif + +#define TAGLEN 16 + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + + (void)nsec; + + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, npub, CRYPTO_NPUBBYTES, NULL, 0); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Encrypt(&instance, m, c, (size_t)mlen); + Xoodyak_Squeeze(&instance, c + mlen, TAGLEN); + *clen = mlen + TAGLEN; + #if 0 + { + unsigned int i; + for (i = 0; i < *clen; ++i ) + { + printf("\\x%02x", c[i] ); + } + printf("\n"); + } + #endif + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + unsigned char tag[TAGLEN]; + unsigned long long mlen_; + + (void)nsec; + + *mlen = 0; + if (clen < TAGLEN) { + return -1; + } + mlen_ = clen - TAGLEN; + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, npub, CRYPTO_NPUBBYTES, NULL, 0); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_); + Xoodyak_Squeeze(&instance, tag, TAGLEN); + if (memcmp(tag, c + mlen_, TAGLEN) != 0) { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/aead-common.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/aead-common.c new file mode 100644 index 0000000..84fc53a --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/aead-common.c @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "aead-common.h" + +int aead_check_tag + (unsigned char *plaintext, unsigned long long plaintext_len, + const unsigned char *tag1, const unsigned char *tag2, + unsigned size) +{ + /* Set "accum" to -1 if the tags match, or 0 if they don't match */ + int accum = 0; + while (size > 0) { + accum |= (*tag1++ ^ *tag2++); + --size; + } + accum = (accum - 1) >> 8; + + /* Destroy the plaintext if the tag match failed */ + while (plaintext_len > 0) { + *plaintext++ &= accum; + --plaintext_len; + } + + /* If "accum" is 0, return -1, otherwise return 0 */ + return ~accum; +} + +int aead_check_tag_precheck + (unsigned char *plaintext, unsigned long long plaintext_len, + const unsigned char *tag1, const unsigned char *tag2, + unsigned size, int precheck) +{ + /* Set "accum" to -1 if the tags match, or 0 if they don't match */ + int accum = 0; + while (size > 0) { + accum |= (*tag1++ ^ *tag2++); + --size; + } + accum = ((accum - 1) >> 8) & precheck; + + /* Destroy the plaintext if the tag match failed */ + while (plaintext_len > 0) { + *plaintext++ &= accum; + --plaintext_len; + } + + /* If "accum" is 0, return -1, otherwise return 0 */ + return ~accum; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/aead-common.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/aead-common.h new file mode 100644 index 0000000..2be95eb --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/aead-common.h @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LWCRYPTO_AEAD_COMMON_H +#define LWCRYPTO_AEAD_COMMON_H + +#include + +/** + * \file aead-common.h + * \brief Definitions that are common across AEAD schemes. + * + * AEAD stands for "Authenticated Encryption with Associated Data". + * It is a standard API pattern for securely encrypting and + * authenticating packets of data. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Encrypts and authenticates a packet with an AEAD scheme. + * + * \param c Buffer to receive the output. + * \param clen On exit, set to the length of the output which includes + * the ciphertext and the authentication tag. + * \param m Buffer that contains the plaintext message to encrypt. + * \param mlen Length of the plaintext message in bytes. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param nsec Secret nonce - normally not used by AEAD schemes. + * \param npub Points to the public nonce for the packet. + * \param k Points to the key to use to encrypt the packet. + * + * \return 0 on success, or a negative value if there was an error in + * the parameters. + */ +typedef int (*aead_cipher_encrypt_t) + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Decrypts and authenticates a packet with an AEAD scheme. + * + * \param m Buffer to receive the plaintext message on output. + * \param mlen Receives the length of the plaintext message on output. + * \param nsec Secret nonce - normally not used by AEAD schemes. + * \param c Buffer that contains the ciphertext and authentication + * tag to decrypt. + * \param clen Length of the input data in bytes, which includes the + * ciphertext and the authentication tag. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param npub Points to the public nonce for the packet. + * \param k Points to the key to use to decrypt the packet. + * + * \return 0 on success, -1 if the authentication tag was incorrect, + * or some other negative number if there was an error in the parameters. + */ +typedef int (*aead_cipher_decrypt_t) + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Hashes a block of input data. + * + * \param out Buffer to receive the hash output. + * \param in Points to the input data to be hashed. + * \param inlen Length of the input data in bytes. + * + * \return Returns zero on success or -1 if there was an error in the + * parameters. + */ +typedef int (*aead_hash_t) + (unsigned char *out, const unsigned char *in, unsigned long long inlen); + +/** + * \brief Initializes the state for a hashing operation. + * + * \param state Hash state to be initialized. + */ +typedef void (*aead_hash_init_t)(void *state); + +/** + * \brief Updates a hash state with more input data. + * + * \param state Hash state to be updated. + * \param in Points to the input data to be incorporated into the state. + * \param inlen Length of the input data to be incorporated into the state. + */ +typedef void (*aead_hash_update_t) + (void *state, const unsigned char *in, unsigned long long inlen); + +/** + * \brief Returns the final hash value from a hashing operation. + * + * \param Hash state to be finalized. + * \param out Points to the output buffer to receive the hash value. + */ +typedef void (*aead_hash_finalize_t)(void *state, unsigned char *out); + +/** + * \brief Aborbs more input data into an XOF state. + * + * \param state XOF state to be updated. + * \param in Points to the input data to be absorbed into the state. + * \param inlen Length of the input data to be absorbed into the state. + * + * \sa ascon_xof_init(), ascon_xof_squeeze() + */ +typedef void (*aead_xof_absorb_t) + (void *state, const unsigned char *in, unsigned long long inlen); + +/** + * \brief Squeezes output data from an XOF state. + * + * \param state XOF state to squeeze the output data from. + * \param out Points to the output buffer to receive the squeezed data. + * \param outlen Number of bytes of data to squeeze out of the state. + */ +typedef void (*aead_xof_squeeze_t) + (void *state, unsigned char *out, unsigned long long outlen); + +/** + * \brief No special AEAD features. + */ +#define AEAD_FLAG_NONE 0x0000 + +/** + * \brief The natural byte order of the AEAD cipher is little-endian. + * + * If this flag is not present, then the natural byte order of the + * AEAD cipher should be assumed to be big-endian. + * + * The natural byte order may be useful when formatting packet sequence + * numbers as nonces. The application needs to know whether the sequence + * number should be packed into the leading or trailing bytes of the nonce. + */ +#define AEAD_FLAG_LITTLE_ENDIAN 0x0001 + +/** + * \brief Meta-information about an AEAD cipher. + */ +typedef struct +{ + const char *name; /**< Name of the cipher */ + unsigned key_len; /**< Length of the key in bytes */ + unsigned nonce_len; /**< Length of the nonce in bytes */ + unsigned tag_len; /**< Length of the tag in bytes */ + unsigned flags; /**< Flags for extra features */ + aead_cipher_encrypt_t encrypt; /**< AEAD encryption function */ + aead_cipher_decrypt_t decrypt; /**< AEAD decryption function */ + +} aead_cipher_t; + +/** + * \brief Meta-information about a hash algorithm that is related to an AEAD. + * + * Regular hash algorithms should provide the "hash", "init", "update", + * and "finalize" functions. Extensible Output Functions (XOF's) should + * proivde the "hash", "init", "absorb", and "squeeze" functions. + */ +typedef struct +{ + const char *name; /**< Name of the hash algorithm */ + size_t state_size; /**< Size of the incremental state structure */ + unsigned hash_len; /**< Length of the hash in bytes */ + unsigned flags; /**< Flags for extra features */ + aead_hash_t hash; /**< All in one hashing function */ + aead_hash_init_t init; /**< Incremental hash/XOF init function */ + aead_hash_update_t update; /**< Incremental hash update function */ + aead_hash_finalize_t finalize; /**< Incremental hash finalize function */ + aead_xof_absorb_t absorb; /**< Incremental XOF absorb function */ + aead_xof_squeeze_t squeeze; /**< Incremental XOF squeeze function */ + +} aead_hash_algorithm_t; + +/** + * \brief Check an authentication tag in constant time. + * + * \param plaintext Points to the plaintext data. + * \param plaintext_len Length of the plaintext in bytes. + * \param tag1 First tag to compare. + * \param tag2 Second tag to compare. + * \param tag_len Length of the tags in bytes. + * + * \return Returns -1 if the tag check failed or 0 if the check succeeded. + * + * If the tag check fails, then the \a plaintext will also be zeroed to + * prevent it from being used accidentally by the application when the + * ciphertext was invalid. + */ +int aead_check_tag + (unsigned char *plaintext, unsigned long long plaintext_len, + const unsigned char *tag1, const unsigned char *tag2, + unsigned tag_len); + +/** + * \brief Check an authentication tag in constant time with a previous check. + * + * \param plaintext Points to the plaintext data. + * \param plaintext_len Length of the plaintext in bytes. + * \param tag1 First tag to compare. + * \param tag2 Second tag to compare. + * \param tag_len Length of the tags in bytes. + * \param precheck Set to -1 if previous check succeeded or 0 if it failed. + * + * \return Returns -1 if the tag check failed or 0 if the check succeeded. + * + * If the tag check fails, then the \a plaintext will also be zeroed to + * prevent it from being used accidentally by the application when the + * ciphertext was invalid. + * + * This version can be used to incorporate other information about the + * correctness of the plaintext into the final result. + */ +int aead_check_tag_precheck + (unsigned char *plaintext, unsigned long long plaintext_len, + const unsigned char *tag1, const unsigned char *tag2, + unsigned tag_len, int precheck); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/api.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/api.h new file mode 100644 index 0000000..b2f8a36 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/encrypt.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/encrypt.c new file mode 100644 index 0000000..f7bb1b4 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/encrypt.c @@ -0,0 +1,26 @@ + +#include "xoodyak.h" + +int crypto_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + return xoodyak_aead_encrypt + (c, clen, m, mlen, ad, adlen, nsec, npub, k); +} + +int crypto_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + return xoodyak_aead_decrypt + (m, mlen, nsec, c, clen, ad, adlen, npub, k); +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-util.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-util.h new file mode 100644 index 0000000..e30166d --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-util.h @@ -0,0 +1,702 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LW_INTERNAL_UTIL_H +#define LW_INTERNAL_UTIL_H + +#include + +/* Figure out how to inline functions using this C compiler */ +#if defined(__STDC__) && __STDC_VERSION__ >= 199901L +#define STATIC_INLINE static inline +#elif defined(__GNUC__) || defined(__clang__) +#define STATIC_INLINE static __inline__ +#else +#define STATIC_INLINE static +#endif + +/* Try to figure out whether the CPU is little-endian or big-endian. + * May need to modify this to include new compiler-specific defines. + * Alternatively, define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ in your + * compiler flags when you compile this library */ +#if defined(__x86_64) || defined(__x86_64__) || \ + defined(__i386) || defined(__i386__) || \ + defined(__AVR__) || defined(__arm) || defined(__arm__) || \ + defined(_M_AMD64) || defined(_M_X64) || defined(_M_IX86) || \ + defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM_FP) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == 1234) || \ + defined(__LITTLE_ENDIAN__) +#define LW_UTIL_LITTLE_ENDIAN 1 +#elif (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == 4321) || \ + defined(__BIG_ENDIAN__) +/* Big endian */ +#else +#error "Cannot determine the endianess of this platform" +#endif + +/* Helper macros to load and store values while converting endian-ness */ + +/* Load a big-endian 32-bit word from a byte buffer */ +#define be_load_word32(ptr) \ + ((((uint32_t)((ptr)[0])) << 24) | \ + (((uint32_t)((ptr)[1])) << 16) | \ + (((uint32_t)((ptr)[2])) << 8) | \ + ((uint32_t)((ptr)[3]))) + +/* Store a big-endian 32-bit word into a byte buffer */ +#define be_store_word32(ptr, x) \ + do { \ + uint32_t _x = (x); \ + (ptr)[0] = (uint8_t)(_x >> 24); \ + (ptr)[1] = (uint8_t)(_x >> 16); \ + (ptr)[2] = (uint8_t)(_x >> 8); \ + (ptr)[3] = (uint8_t)_x; \ + } while (0) + +/* Load a little-endian 32-bit word from a byte buffer */ +#define le_load_word32(ptr) \ + ((((uint32_t)((ptr)[3])) << 24) | \ + (((uint32_t)((ptr)[2])) << 16) | \ + (((uint32_t)((ptr)[1])) << 8) | \ + ((uint32_t)((ptr)[0]))) + +/* Store a little-endian 32-bit word into a byte buffer */ +#define le_store_word32(ptr, x) \ + do { \ + uint32_t _x = (x); \ + (ptr)[0] = (uint8_t)_x; \ + (ptr)[1] = (uint8_t)(_x >> 8); \ + (ptr)[2] = (uint8_t)(_x >> 16); \ + (ptr)[3] = (uint8_t)(_x >> 24); \ + } while (0) + +/* Load a big-endian 64-bit word from a byte buffer */ +#define be_load_word64(ptr) \ + ((((uint64_t)((ptr)[0])) << 56) | \ + (((uint64_t)((ptr)[1])) << 48) | \ + (((uint64_t)((ptr)[2])) << 40) | \ + (((uint64_t)((ptr)[3])) << 32) | \ + (((uint64_t)((ptr)[4])) << 24) | \ + (((uint64_t)((ptr)[5])) << 16) | \ + (((uint64_t)((ptr)[6])) << 8) | \ + ((uint64_t)((ptr)[7]))) + +/* Store a big-endian 64-bit word into a byte buffer */ +#define be_store_word64(ptr, x) \ + do { \ + uint64_t _x = (x); \ + (ptr)[0] = (uint8_t)(_x >> 56); \ + (ptr)[1] = (uint8_t)(_x >> 48); \ + (ptr)[2] = (uint8_t)(_x >> 40); \ + (ptr)[3] = (uint8_t)(_x >> 32); \ + (ptr)[4] = (uint8_t)(_x >> 24); \ + (ptr)[5] = (uint8_t)(_x >> 16); \ + (ptr)[6] = (uint8_t)(_x >> 8); \ + (ptr)[7] = (uint8_t)_x; \ + } while (0) + +/* Load a little-endian 64-bit word from a byte buffer */ +#define le_load_word64(ptr) \ + ((((uint64_t)((ptr)[7])) << 56) | \ + (((uint64_t)((ptr)[6])) << 48) | \ + (((uint64_t)((ptr)[5])) << 40) | \ + (((uint64_t)((ptr)[4])) << 32) | \ + (((uint64_t)((ptr)[3])) << 24) | \ + (((uint64_t)((ptr)[2])) << 16) | \ + (((uint64_t)((ptr)[1])) << 8) | \ + ((uint64_t)((ptr)[0]))) + +/* Store a little-endian 64-bit word into a byte buffer */ +#define le_store_word64(ptr, x) \ + do { \ + uint64_t _x = (x); \ + (ptr)[0] = (uint8_t)_x; \ + (ptr)[1] = (uint8_t)(_x >> 8); \ + (ptr)[2] = (uint8_t)(_x >> 16); \ + (ptr)[3] = (uint8_t)(_x >> 24); \ + (ptr)[4] = (uint8_t)(_x >> 32); \ + (ptr)[5] = (uint8_t)(_x >> 40); \ + (ptr)[6] = (uint8_t)(_x >> 48); \ + (ptr)[7] = (uint8_t)(_x >> 56); \ + } while (0) + +/* Load a big-endian 16-bit word from a byte buffer */ +#define be_load_word16(ptr) \ + ((((uint16_t)((ptr)[0])) << 8) | \ + ((uint16_t)((ptr)[1]))) + +/* Store a big-endian 16-bit word into a byte buffer */ +#define be_store_word16(ptr, x) \ + do { \ + uint16_t _x = (x); \ + (ptr)[0] = (uint8_t)(_x >> 8); \ + (ptr)[1] = (uint8_t)_x; \ + } while (0) + +/* Load a little-endian 16-bit word from a byte buffer */ +#define le_load_word16(ptr) \ + ((((uint16_t)((ptr)[1])) << 8) | \ + ((uint16_t)((ptr)[0]))) + +/* Store a little-endian 16-bit word into a byte buffer */ +#define le_store_word16(ptr, x) \ + do { \ + uint16_t _x = (x); \ + (ptr)[0] = (uint8_t)_x; \ + (ptr)[1] = (uint8_t)(_x >> 8); \ + } while (0) + +/* XOR a source byte buffer against a destination */ +#define lw_xor_block(dest, src, len) \ + do { \ + unsigned char *_dest = (dest); \ + const unsigned char *_src = (src); \ + unsigned _len = (len); \ + while (_len > 0) { \ + *_dest++ ^= *_src++; \ + --_len; \ + } \ + } while (0) + +/* XOR two source byte buffers and put the result in a destination buffer */ +#define lw_xor_block_2_src(dest, src1, src2, len) \ + do { \ + unsigned char *_dest = (dest); \ + const unsigned char *_src1 = (src1); \ + const unsigned char *_src2 = (src2); \ + unsigned _len = (len); \ + while (_len > 0) { \ + *_dest++ = *_src1++ ^ *_src2++; \ + --_len; \ + } \ + } while (0) + +/* XOR a source byte buffer against a destination and write to another + * destination at the same time */ +#define lw_xor_block_2_dest(dest2, dest, src, len) \ + do { \ + unsigned char *_dest2 = (dest2); \ + unsigned char *_dest = (dest); \ + const unsigned char *_src = (src); \ + unsigned _len = (len); \ + while (_len > 0) { \ + *_dest2++ = (*_dest++ ^= *_src++); \ + --_len; \ + } \ + } while (0) + +/* XOR two byte buffers and write to a destination which at the same + * time copying the contents of src2 to dest2 */ +#define lw_xor_block_copy_src(dest2, dest, src1, src2, len) \ + do { \ + unsigned char *_dest2 = (dest2); \ + unsigned char *_dest = (dest); \ + const unsigned char *_src1 = (src1); \ + const unsigned char *_src2 = (src2); \ + unsigned _len = (len); \ + while (_len > 0) { \ + unsigned char _temp = *_src2++; \ + *_dest2++ = _temp; \ + *_dest++ = *_src1++ ^ _temp; \ + --_len; \ + } \ + } while (0) + +/* XOR a source byte buffer against a destination and write to another + * destination at the same time. This version swaps the source value + * into the "dest" buffer */ +#define lw_xor_block_swap(dest2, dest, src, len) \ + do { \ + unsigned char *_dest2 = (dest2); \ + unsigned char *_dest = (dest); \ + const unsigned char *_src = (src); \ + unsigned _len = (len); \ + while (_len > 0) { \ + unsigned char _temp = *_src++; \ + *_dest2++ = *_dest ^ _temp; \ + *_dest++ = _temp; \ + --_len; \ + } \ + } while (0) + +/* Rotation functions need to be optimised for best performance on AVR. + * The most efficient rotations are where the number of bits is 1 or a + * multiple of 8, so we compose the efficient rotations to produce all + * other rotation counts of interest. */ + +#if defined(__AVR__) +#define LW_CRYPTO_ROTATE32_COMPOSED 1 +#else +#define LW_CRYPTO_ROTATE32_COMPOSED 0 +#endif + +/* Rotation macros for 32-bit arguments */ + +/* Generic left rotate */ +#define leftRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (32 - (bits))); \ + })) + +/* Generic right rotate */ +#define rightRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (32 - (bits))); \ + })) + +#if !LW_CRYPTO_ROTATE32_COMPOSED + +/* Left rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define leftRotate1(a) (leftRotate((a), 1)) +#define leftRotate2(a) (leftRotate((a), 2)) +#define leftRotate3(a) (leftRotate((a), 3)) +#define leftRotate4(a) (leftRotate((a), 4)) +#define leftRotate5(a) (leftRotate((a), 5)) +#define leftRotate6(a) (leftRotate((a), 6)) +#define leftRotate7(a) (leftRotate((a), 7)) +#define leftRotate8(a) (leftRotate((a), 8)) +#define leftRotate9(a) (leftRotate((a), 9)) +#define leftRotate10(a) (leftRotate((a), 10)) +#define leftRotate11(a) (leftRotate((a), 11)) +#define leftRotate12(a) (leftRotate((a), 12)) +#define leftRotate13(a) (leftRotate((a), 13)) +#define leftRotate14(a) (leftRotate((a), 14)) +#define leftRotate15(a) (leftRotate((a), 15)) +#define leftRotate16(a) (leftRotate((a), 16)) +#define leftRotate17(a) (leftRotate((a), 17)) +#define leftRotate18(a) (leftRotate((a), 18)) +#define leftRotate19(a) (leftRotate((a), 19)) +#define leftRotate20(a) (leftRotate((a), 20)) +#define leftRotate21(a) (leftRotate((a), 21)) +#define leftRotate22(a) (leftRotate((a), 22)) +#define leftRotate23(a) (leftRotate((a), 23)) +#define leftRotate24(a) (leftRotate((a), 24)) +#define leftRotate25(a) (leftRotate((a), 25)) +#define leftRotate26(a) (leftRotate((a), 26)) +#define leftRotate27(a) (leftRotate((a), 27)) +#define leftRotate28(a) (leftRotate((a), 28)) +#define leftRotate29(a) (leftRotate((a), 29)) +#define leftRotate30(a) (leftRotate((a), 30)) +#define leftRotate31(a) (leftRotate((a), 31)) + +/* Right rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define rightRotate1(a) (rightRotate((a), 1)) +#define rightRotate2(a) (rightRotate((a), 2)) +#define rightRotate3(a) (rightRotate((a), 3)) +#define rightRotate4(a) (rightRotate((a), 4)) +#define rightRotate5(a) (rightRotate((a), 5)) +#define rightRotate6(a) (rightRotate((a), 6)) +#define rightRotate7(a) (rightRotate((a), 7)) +#define rightRotate8(a) (rightRotate((a), 8)) +#define rightRotate9(a) (rightRotate((a), 9)) +#define rightRotate10(a) (rightRotate((a), 10)) +#define rightRotate11(a) (rightRotate((a), 11)) +#define rightRotate12(a) (rightRotate((a), 12)) +#define rightRotate13(a) (rightRotate((a), 13)) +#define rightRotate14(a) (rightRotate((a), 14)) +#define rightRotate15(a) (rightRotate((a), 15)) +#define rightRotate16(a) (rightRotate((a), 16)) +#define rightRotate17(a) (rightRotate((a), 17)) +#define rightRotate18(a) (rightRotate((a), 18)) +#define rightRotate19(a) (rightRotate((a), 19)) +#define rightRotate20(a) (rightRotate((a), 20)) +#define rightRotate21(a) (rightRotate((a), 21)) +#define rightRotate22(a) (rightRotate((a), 22)) +#define rightRotate23(a) (rightRotate((a), 23)) +#define rightRotate24(a) (rightRotate((a), 24)) +#define rightRotate25(a) (rightRotate((a), 25)) +#define rightRotate26(a) (rightRotate((a), 26)) +#define rightRotate27(a) (rightRotate((a), 27)) +#define rightRotate28(a) (rightRotate((a), 28)) +#define rightRotate29(a) (rightRotate((a), 29)) +#define rightRotate30(a) (rightRotate((a), 30)) +#define rightRotate31(a) (rightRotate((a), 31)) + +#else /* LW_CRYPTO_ROTATE32_COMPOSED */ + +/* Composed rotation macros where 1 and 8 are fast, but others are slow */ + +/* Left rotate by 1 */ +#define leftRotate1(a) (leftRotate((a), 1)) + +/* Left rotate by 2 */ +#define leftRotate2(a) (leftRotate(leftRotate((a), 1), 1)) + +/* Left rotate by 3 */ +#define leftRotate3(a) (leftRotate(leftRotate(leftRotate((a), 1), 1), 1)) + +/* Left rotate by 4 */ +#define leftRotate4(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 1), 1), 1), 1)) + +/* Left rotate by 5: Rotate left by 8, then right by 3 */ +#define leftRotate5(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 8), 1), 1), 1)) + +/* Left rotate by 6: Rotate left by 8, then right by 2 */ +#define leftRotate6(a) (rightRotate(rightRotate(leftRotate((a), 8), 1), 1)) + +/* Left rotate by 7: Rotate left by 8, then right by 1 */ +#define leftRotate7(a) (rightRotate(leftRotate((a), 8), 1)) + +/* Left rotate by 8 */ +#define leftRotate8(a) (leftRotate((a), 8)) + +/* Left rotate by 9: Rotate left by 8, then left by 1 */ +#define leftRotate9(a) (leftRotate(leftRotate((a), 8), 1)) + +/* Left rotate by 10: Rotate left by 8, then left by 2 */ +#define leftRotate10(a) (leftRotate(leftRotate(leftRotate((a), 8), 1), 1)) + +/* Left rotate by 11: Rotate left by 8, then left by 3 */ +#define leftRotate11(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 8), 1), 1), 1)) + +/* Left rotate by 12: Rotate left by 16, then right by 4 */ +#define leftRotate12(a) (rightRotate(rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +/* Left rotate by 13: Rotate left by 16, then right by 3 */ +#define leftRotate13(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1)) + +/* Left rotate by 14: Rotate left by 16, then right by 2 */ +#define leftRotate14(a) (rightRotate(rightRotate(leftRotate((a), 16), 1), 1)) + +/* Left rotate by 15: Rotate left by 16, then right by 1 */ +#define leftRotate15(a) (rightRotate(leftRotate((a), 16), 1)) + +/* Left rotate by 16 */ +#define leftRotate16(a) (leftRotate((a), 16)) + +/* Left rotate by 17: Rotate left by 16, then left by 1 */ +#define leftRotate17(a) (leftRotate(leftRotate((a), 16), 1)) + +/* Left rotate by 18: Rotate left by 16, then left by 2 */ +#define leftRotate18(a) (leftRotate(leftRotate(leftRotate((a), 16), 1), 1)) + +/* Left rotate by 19: Rotate left by 16, then left by 3 */ +#define leftRotate19(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1)) + +/* Left rotate by 20: Rotate left by 16, then left by 4 */ +#define leftRotate20(a) (leftRotate(leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +/* Left rotate by 21: Rotate left by 24, then right by 3 */ +#define leftRotate21(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 24), 1), 1), 1)) + +/* Left rotate by 22: Rotate left by 24, then right by 2 */ +#define leftRotate22(a) (rightRotate(rightRotate(leftRotate((a), 24), 1), 1)) + +/* Left rotate by 23: Rotate left by 24, then right by 1 */ +#define leftRotate23(a) (rightRotate(leftRotate((a), 24), 1)) + +/* Left rotate by 24 */ +#define leftRotate24(a) (leftRotate((a), 24)) + +/* Left rotate by 25: Rotate left by 24, then left by 1 */ +#define leftRotate25(a) (leftRotate(leftRotate((a), 24), 1)) + +/* Left rotate by 26: Rotate left by 24, then left by 2 */ +#define leftRotate26(a) (leftRotate(leftRotate(leftRotate((a), 24), 1), 1)) + +/* Left rotate by 27: Rotate left by 24, then left by 3 */ +#define leftRotate27(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 24), 1), 1), 1)) + +/* Left rotate by 28: Rotate right by 4 */ +#define leftRotate28(a) (rightRotate(rightRotate(rightRotate(rightRotate((a), 1), 1), 1), 1)) + +/* Left rotate by 29: Rotate right by 3 */ +#define leftRotate29(a) (rightRotate(rightRotate(rightRotate((a), 1), 1), 1)) + +/* Left rotate by 30: Rotate right by 2 */ +#define leftRotate30(a) (rightRotate(rightRotate((a), 1), 1)) + +/* Left rotate by 31: Rotate right by 1 */ +#define leftRotate31(a) (rightRotate((a), 1)) + +/* Define the 32-bit right rotations in terms of left rotations */ +#define rightRotate1(a) (leftRotate31((a))) +#define rightRotate2(a) (leftRotate30((a))) +#define rightRotate3(a) (leftRotate29((a))) +#define rightRotate4(a) (leftRotate28((a))) +#define rightRotate5(a) (leftRotate27((a))) +#define rightRotate6(a) (leftRotate26((a))) +#define rightRotate7(a) (leftRotate25((a))) +#define rightRotate8(a) (leftRotate24((a))) +#define rightRotate9(a) (leftRotate23((a))) +#define rightRotate10(a) (leftRotate22((a))) +#define rightRotate11(a) (leftRotate21((a))) +#define rightRotate12(a) (leftRotate20((a))) +#define rightRotate13(a) (leftRotate19((a))) +#define rightRotate14(a) (leftRotate18((a))) +#define rightRotate15(a) (leftRotate17((a))) +#define rightRotate16(a) (leftRotate16((a))) +#define rightRotate17(a) (leftRotate15((a))) +#define rightRotate18(a) (leftRotate14((a))) +#define rightRotate19(a) (leftRotate13((a))) +#define rightRotate20(a) (leftRotate12((a))) +#define rightRotate21(a) (leftRotate11((a))) +#define rightRotate22(a) (leftRotate10((a))) +#define rightRotate23(a) (leftRotate9((a))) +#define rightRotate24(a) (leftRotate8((a))) +#define rightRotate25(a) (leftRotate7((a))) +#define rightRotate26(a) (leftRotate6((a))) +#define rightRotate27(a) (leftRotate5((a))) +#define rightRotate28(a) (leftRotate4((a))) +#define rightRotate29(a) (leftRotate3((a))) +#define rightRotate30(a) (leftRotate2((a))) +#define rightRotate31(a) (leftRotate1((a))) + +#endif /* LW_CRYPTO_ROTATE32_COMPOSED */ + +/* Rotation macros for 64-bit arguments */ + +/* Generic left rotate */ +#define leftRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (64 - (bits))); \ + })) + +/* Generic right rotate */ +#define rightRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (64 - (bits))); \ + })) + +/* Left rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define leftRotate1_64(a) (leftRotate_64((a), 1)) +#define leftRotate2_64(a) (leftRotate_64((a), 2)) +#define leftRotate3_64(a) (leftRotate_64((a), 3)) +#define leftRotate4_64(a) (leftRotate_64((a), 4)) +#define leftRotate5_64(a) (leftRotate_64((a), 5)) +#define leftRotate6_64(a) (leftRotate_64((a), 6)) +#define leftRotate7_64(a) (leftRotate_64((a), 7)) +#define leftRotate8_64(a) (leftRotate_64((a), 8)) +#define leftRotate9_64(a) (leftRotate_64((a), 9)) +#define leftRotate10_64(a) (leftRotate_64((a), 10)) +#define leftRotate11_64(a) (leftRotate_64((a), 11)) +#define leftRotate12_64(a) (leftRotate_64((a), 12)) +#define leftRotate13_64(a) (leftRotate_64((a), 13)) +#define leftRotate14_64(a) (leftRotate_64((a), 14)) +#define leftRotate15_64(a) (leftRotate_64((a), 15)) +#define leftRotate16_64(a) (leftRotate_64((a), 16)) +#define leftRotate17_64(a) (leftRotate_64((a), 17)) +#define leftRotate18_64(a) (leftRotate_64((a), 18)) +#define leftRotate19_64(a) (leftRotate_64((a), 19)) +#define leftRotate20_64(a) (leftRotate_64((a), 20)) +#define leftRotate21_64(a) (leftRotate_64((a), 21)) +#define leftRotate22_64(a) (leftRotate_64((a), 22)) +#define leftRotate23_64(a) (leftRotate_64((a), 23)) +#define leftRotate24_64(a) (leftRotate_64((a), 24)) +#define leftRotate25_64(a) (leftRotate_64((a), 25)) +#define leftRotate26_64(a) (leftRotate_64((a), 26)) +#define leftRotate27_64(a) (leftRotate_64((a), 27)) +#define leftRotate28_64(a) (leftRotate_64((a), 28)) +#define leftRotate29_64(a) (leftRotate_64((a), 29)) +#define leftRotate30_64(a) (leftRotate_64((a), 30)) +#define leftRotate31_64(a) (leftRotate_64((a), 31)) +#define leftRotate32_64(a) (leftRotate_64((a), 32)) +#define leftRotate33_64(a) (leftRotate_64((a), 33)) +#define leftRotate34_64(a) (leftRotate_64((a), 34)) +#define leftRotate35_64(a) (leftRotate_64((a), 35)) +#define leftRotate36_64(a) (leftRotate_64((a), 36)) +#define leftRotate37_64(a) (leftRotate_64((a), 37)) +#define leftRotate38_64(a) (leftRotate_64((a), 38)) +#define leftRotate39_64(a) (leftRotate_64((a), 39)) +#define leftRotate40_64(a) (leftRotate_64((a), 40)) +#define leftRotate41_64(a) (leftRotate_64((a), 41)) +#define leftRotate42_64(a) (leftRotate_64((a), 42)) +#define leftRotate43_64(a) (leftRotate_64((a), 43)) +#define leftRotate44_64(a) (leftRotate_64((a), 44)) +#define leftRotate45_64(a) (leftRotate_64((a), 45)) +#define leftRotate46_64(a) (leftRotate_64((a), 46)) +#define leftRotate47_64(a) (leftRotate_64((a), 47)) +#define leftRotate48_64(a) (leftRotate_64((a), 48)) +#define leftRotate49_64(a) (leftRotate_64((a), 49)) +#define leftRotate50_64(a) (leftRotate_64((a), 50)) +#define leftRotate51_64(a) (leftRotate_64((a), 51)) +#define leftRotate52_64(a) (leftRotate_64((a), 52)) +#define leftRotate53_64(a) (leftRotate_64((a), 53)) +#define leftRotate54_64(a) (leftRotate_64((a), 54)) +#define leftRotate55_64(a) (leftRotate_64((a), 55)) +#define leftRotate56_64(a) (leftRotate_64((a), 56)) +#define leftRotate57_64(a) (leftRotate_64((a), 57)) +#define leftRotate58_64(a) (leftRotate_64((a), 58)) +#define leftRotate59_64(a) (leftRotate_64((a), 59)) +#define leftRotate60_64(a) (leftRotate_64((a), 60)) +#define leftRotate61_64(a) (leftRotate_64((a), 61)) +#define leftRotate62_64(a) (leftRotate_64((a), 62)) +#define leftRotate63_64(a) (leftRotate_64((a), 63)) + +/* Right rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define rightRotate1_64(a) (rightRotate_64((a), 1)) +#define rightRotate2_64(a) (rightRotate_64((a), 2)) +#define rightRotate3_64(a) (rightRotate_64((a), 3)) +#define rightRotate4_64(a) (rightRotate_64((a), 4)) +#define rightRotate5_64(a) (rightRotate_64((a), 5)) +#define rightRotate6_64(a) (rightRotate_64((a), 6)) +#define rightRotate7_64(a) (rightRotate_64((a), 7)) +#define rightRotate8_64(a) (rightRotate_64((a), 8)) +#define rightRotate9_64(a) (rightRotate_64((a), 9)) +#define rightRotate10_64(a) (rightRotate_64((a), 10)) +#define rightRotate11_64(a) (rightRotate_64((a), 11)) +#define rightRotate12_64(a) (rightRotate_64((a), 12)) +#define rightRotate13_64(a) (rightRotate_64((a), 13)) +#define rightRotate14_64(a) (rightRotate_64((a), 14)) +#define rightRotate15_64(a) (rightRotate_64((a), 15)) +#define rightRotate16_64(a) (rightRotate_64((a), 16)) +#define rightRotate17_64(a) (rightRotate_64((a), 17)) +#define rightRotate18_64(a) (rightRotate_64((a), 18)) +#define rightRotate19_64(a) (rightRotate_64((a), 19)) +#define rightRotate20_64(a) (rightRotate_64((a), 20)) +#define rightRotate21_64(a) (rightRotate_64((a), 21)) +#define rightRotate22_64(a) (rightRotate_64((a), 22)) +#define rightRotate23_64(a) (rightRotate_64((a), 23)) +#define rightRotate24_64(a) (rightRotate_64((a), 24)) +#define rightRotate25_64(a) (rightRotate_64((a), 25)) +#define rightRotate26_64(a) (rightRotate_64((a), 26)) +#define rightRotate27_64(a) (rightRotate_64((a), 27)) +#define rightRotate28_64(a) (rightRotate_64((a), 28)) +#define rightRotate29_64(a) (rightRotate_64((a), 29)) +#define rightRotate30_64(a) (rightRotate_64((a), 30)) +#define rightRotate31_64(a) (rightRotate_64((a), 31)) +#define rightRotate32_64(a) (rightRotate_64((a), 32)) +#define rightRotate33_64(a) (rightRotate_64((a), 33)) +#define rightRotate34_64(a) (rightRotate_64((a), 34)) +#define rightRotate35_64(a) (rightRotate_64((a), 35)) +#define rightRotate36_64(a) (rightRotate_64((a), 36)) +#define rightRotate37_64(a) (rightRotate_64((a), 37)) +#define rightRotate38_64(a) (rightRotate_64((a), 38)) +#define rightRotate39_64(a) (rightRotate_64((a), 39)) +#define rightRotate40_64(a) (rightRotate_64((a), 40)) +#define rightRotate41_64(a) (rightRotate_64((a), 41)) +#define rightRotate42_64(a) (rightRotate_64((a), 42)) +#define rightRotate43_64(a) (rightRotate_64((a), 43)) +#define rightRotate44_64(a) (rightRotate_64((a), 44)) +#define rightRotate45_64(a) (rightRotate_64((a), 45)) +#define rightRotate46_64(a) (rightRotate_64((a), 46)) +#define rightRotate47_64(a) (rightRotate_64((a), 47)) +#define rightRotate48_64(a) (rightRotate_64((a), 48)) +#define rightRotate49_64(a) (rightRotate_64((a), 49)) +#define rightRotate50_64(a) (rightRotate_64((a), 50)) +#define rightRotate51_64(a) (rightRotate_64((a), 51)) +#define rightRotate52_64(a) (rightRotate_64((a), 52)) +#define rightRotate53_64(a) (rightRotate_64((a), 53)) +#define rightRotate54_64(a) (rightRotate_64((a), 54)) +#define rightRotate55_64(a) (rightRotate_64((a), 55)) +#define rightRotate56_64(a) (rightRotate_64((a), 56)) +#define rightRotate57_64(a) (rightRotate_64((a), 57)) +#define rightRotate58_64(a) (rightRotate_64((a), 58)) +#define rightRotate59_64(a) (rightRotate_64((a), 59)) +#define rightRotate60_64(a) (rightRotate_64((a), 60)) +#define rightRotate61_64(a) (rightRotate_64((a), 61)) +#define rightRotate62_64(a) (rightRotate_64((a), 62)) +#define rightRotate63_64(a) (rightRotate_64((a), 63)) + +/* Rotate a 16-bit value left by a number of bits */ +#define leftRotate_16(a, bits) \ + (__extension__ ({ \ + uint16_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (16 - (bits))); \ + })) + +/* Rotate a 16-bit value right by a number of bits */ +#define rightRotate_16(a, bits) \ + (__extension__ ({ \ + uint16_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (16 - (bits))); \ + })) + +/* Left rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define leftRotate1_16(a) (leftRotate_16((a), 1)) +#define leftRotate2_16(a) (leftRotate_16((a), 2)) +#define leftRotate3_16(a) (leftRotate_16((a), 3)) +#define leftRotate4_16(a) (leftRotate_16((a), 4)) +#define leftRotate5_16(a) (leftRotate_16((a), 5)) +#define leftRotate6_16(a) (leftRotate_16((a), 6)) +#define leftRotate7_16(a) (leftRotate_16((a), 7)) +#define leftRotate8_16(a) (leftRotate_16((a), 8)) +#define leftRotate9_16(a) (leftRotate_16((a), 9)) +#define leftRotate10_16(a) (leftRotate_16((a), 10)) +#define leftRotate11_16(a) (leftRotate_16((a), 11)) +#define leftRotate12_16(a) (leftRotate_16((a), 12)) +#define leftRotate13_16(a) (leftRotate_16((a), 13)) +#define leftRotate14_16(a) (leftRotate_16((a), 14)) +#define leftRotate15_16(a) (leftRotate_16((a), 15)) + +/* Right rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define rightRotate1_16(a) (rightRotate_16((a), 1)) +#define rightRotate2_16(a) (rightRotate_16((a), 2)) +#define rightRotate3_16(a) (rightRotate_16((a), 3)) +#define rightRotate4_16(a) (rightRotate_16((a), 4)) +#define rightRotate5_16(a) (rightRotate_16((a), 5)) +#define rightRotate6_16(a) (rightRotate_16((a), 6)) +#define rightRotate7_16(a) (rightRotate_16((a), 7)) +#define rightRotate8_16(a) (rightRotate_16((a), 8)) +#define rightRotate9_16(a) (rightRotate_16((a), 9)) +#define rightRotate10_16(a) (rightRotate_16((a), 10)) +#define rightRotate11_16(a) (rightRotate_16((a), 11)) +#define rightRotate12_16(a) (rightRotate_16((a), 12)) +#define rightRotate13_16(a) (rightRotate_16((a), 13)) +#define rightRotate14_16(a) (rightRotate_16((a), 14)) +#define rightRotate15_16(a) (rightRotate_16((a), 15)) + +/* Rotate an 8-bit value left by a number of bits */ +#define leftRotate_8(a, bits) \ + (__extension__ ({ \ + uint8_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (8 - (bits))); \ + })) + +/* Rotate an 8-bit value right by a number of bits */ +#define rightRotate_8(a, bits) \ + (__extension__ ({ \ + uint8_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (8 - (bits))); \ + })) + +/* Left rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define leftRotate1_8(a) (leftRotate_8((a), 1)) +#define leftRotate2_8(a) (leftRotate_8((a), 2)) +#define leftRotate3_8(a) (leftRotate_8((a), 3)) +#define leftRotate4_8(a) (leftRotate_8((a), 4)) +#define leftRotate5_8(a) (leftRotate_8((a), 5)) +#define leftRotate6_8(a) (leftRotate_8((a), 6)) +#define leftRotate7_8(a) (leftRotate_8((a), 7)) + +/* Right rotate by a specific number of bits. These macros may be replaced + * with more efficient ones on platforms that lack a barrel shifter */ +#define rightRotate1_8(a) (rightRotate_8((a), 1)) +#define rightRotate2_8(a) (rightRotate_8((a), 2)) +#define rightRotate3_8(a) (rightRotate_8((a), 3)) +#define rightRotate4_8(a) (rightRotate_8((a), 4)) +#define rightRotate5_8(a) (rightRotate_8((a), 5)) +#define rightRotate6_8(a) (rightRotate_8((a), 6)) +#define rightRotate7_8(a) (rightRotate_8((a), 7)) + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-xoodoo-avr.S b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-xoodoo-avr.S new file mode 100644 index 0000000..629c19d --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-xoodoo-avr.S @@ -0,0 +1,935 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global xoodoo_permute + .type xoodoo_permute, @function +xoodoo_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + movw r30,r24 +.L__stack_usage = 16 + ldi r18,88 + mov r19,r1 + rcall 34f + ldi r18,56 + rcall 34f + ldi r18,192 + ldi r19,3 + rcall 34f + ldi r18,208 + mov r19,r1 + rcall 34f + ldi r18,32 + ldi r19,1 + rcall 34f + ldi r18,20 + mov r19,r1 + rcall 34f + ldi r18,96 + rcall 34f + ldi r18,44 + rcall 34f + ldi r18,128 + ldi r19,3 + rcall 34f + ldi r18,240 + mov r19,r1 + rcall 34f + ldi r18,160 + ldi r19,1 + rcall 34f + ldi r18,18 + mov r19,r1 + rcall 34f + rjmp 888f +34: + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r0,Z+28 + eor r6,r0 + ldd r0,Z+29 + eor r7,r0 + ldd r0,Z+30 + eor r8,r0 + ldd r0,Z+31 + eor r9,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + ld r20,Z + ldd r21,Z+1 + ldd r22,Z+2 + ldd r23,Z+3 + ldd r26,Z+16 + ldd r27,Z+17 + ldd r28,Z+18 + ldd r29,Z+19 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + movw r10,r20 + movw r12,r22 + eor r10,r26 + eor r11,r27 + eor r12,r28 + eor r13,r29 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r14,r6 + movw r24,r8 + mov r0,r1 + lsr r9 + ror r8 + ror r7 + ror r6 + ror r0 + lsr r9 + ror r8 + ror r7 + ror r6 + ror r0 + lsr r9 + ror r8 + ror r7 + ror r6 + ror r0 + or r9,r0 + mov r0,r1 + lsr r25 + ror r24 + ror r15 + ror r14 + ror r0 + lsr r25 + ror r24 + ror r15 + ror r14 + ror r0 + or r25,r0 + eor r9,r24 + eor r6,r25 + eor r7,r14 + eor r8,r15 + movw r14,r10 + movw r24,r12 + mov r0,r1 + lsr r13 + ror r12 + ror r11 + ror r10 + ror r0 + lsr r13 + ror r12 + ror r11 + ror r10 + ror r0 + lsr r13 + ror r12 + ror r11 + ror r10 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r24 + ror r15 + ror r14 + ror r0 + lsr r25 + ror r24 + ror r15 + ror r14 + ror r0 + or r25,r0 + eor r13,r24 + eor r10,r25 + eor r11,r14 + eor r12,r15 + eor r20,r9 + eor r21,r6 + eor r22,r7 + eor r23,r8 + eor r26,r9 + eor r27,r6 + eor r28,r7 + eor r29,r8 + eor r2,r9 + eor r3,r6 + eor r4,r7 + eor r5,r8 + st Z,r20 + std Z+1,r21 + std Z+2,r22 + std Z+3,r23 + std Z+16,r26 + std Z+17,r27 + std Z+18,r28 + std Z+19,r29 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + ldd r20,Z+4 + ldd r21,Z+5 + ldd r22,Z+6 + ldd r23,Z+7 + ldd r26,Z+20 + ldd r27,Z+21 + ldd r28,Z+22 + ldd r29,Z+23 + ldd r2,Z+36 + ldd r3,Z+37 + ldd r4,Z+38 + ldd r5,Z+39 + movw r6,r20 + movw r8,r22 + eor r6,r26 + eor r7,r27 + eor r8,r28 + eor r9,r29 + eor r6,r2 + eor r7,r3 + eor r8,r4 + eor r9,r5 + movw r14,r6 + movw r24,r8 + mov r0,r1 + lsr r9 + ror r8 + ror r7 + ror r6 + ror r0 + lsr r9 + ror r8 + ror r7 + ror r6 + ror r0 + lsr r9 + ror r8 + ror r7 + ror r6 + ror r0 + or r9,r0 + mov r0,r1 + lsr r25 + ror r24 + ror r15 + ror r14 + ror r0 + lsr r25 + ror r24 + ror r15 + ror r14 + ror r0 + or r25,r0 + eor r9,r24 + eor r6,r25 + eor r7,r14 + eor r8,r15 + eor r20,r13 + eor r21,r10 + eor r22,r11 + eor r23,r12 + eor r26,r13 + eor r27,r10 + eor r28,r11 + eor r29,r12 + eor r2,r13 + eor r3,r10 + eor r4,r11 + eor r5,r12 + std Z+4,r20 + std Z+5,r21 + std Z+6,r22 + std Z+7,r23 + std Z+20,r26 + std Z+21,r27 + std Z+22,r28 + std Z+23,r29 + std Z+36,r2 + std Z+37,r3 + std Z+38,r4 + std Z+39,r5 + ldd r20,Z+8 + ldd r21,Z+9 + ldd r22,Z+10 + ldd r23,Z+11 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r28,Z+26 + ldd r29,Z+27 + ldd r2,Z+40 + ldd r3,Z+41 + ldd r4,Z+42 + ldd r5,Z+43 + movw r10,r20 + movw r12,r22 + eor r10,r26 + eor r11,r27 + eor r12,r28 + eor r13,r29 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r14,r10 + movw r24,r12 + mov r0,r1 + lsr r13 + ror r12 + ror r11 + ror r10 + ror r0 + lsr r13 + ror r12 + ror r11 + ror r10 + ror r0 + lsr r13 + ror r12 + ror r11 + ror r10 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r24 + ror r15 + ror r14 + ror r0 + lsr r25 + ror r24 + ror r15 + ror r14 + ror r0 + or r25,r0 + eor r13,r24 + eor r10,r25 + eor r11,r14 + eor r12,r15 + eor r20,r9 + eor r21,r6 + eor r22,r7 + eor r23,r8 + eor r26,r9 + eor r27,r6 + eor r28,r7 + eor r29,r8 + eor r2,r9 + eor r3,r6 + eor r4,r7 + eor r5,r8 + std Z+8,r20 + std Z+9,r21 + std Z+10,r22 + std Z+11,r23 + std Z+24,r26 + std Z+25,r27 + std Z+26,r28 + std Z+27,r29 + std Z+40,r2 + std Z+41,r3 + std Z+42,r4 + std Z+43,r5 + ldd r0,Z+12 + eor r0,r13 + std Z+12,r0 + ldd r0,Z+13 + eor r0,r10 + std Z+13,r0 + ldd r0,Z+14 + eor r0,r11 + std Z+14,r0 + ldd r0,Z+15 + eor r0,r12 + std Z+15,r0 + ldd r6,Z+28 + ldd r7,Z+29 + ldd r8,Z+30 + ldd r9,Z+31 + eor r6,r13 + eor r7,r10 + eor r8,r11 + eor r9,r12 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + eor r14,r13 + eor r15,r10 + eor r24,r11 + eor r25,r12 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + std Z+28,r10 + std Z+29,r11 + std Z+30,r12 + std Z+31,r13 + ldd r10,Z+20 + ldd r11,Z+21 + ldd r12,Z+22 + ldd r13,Z+23 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + std Z+20,r10 + std Z+21,r11 + std Z+22,r12 + std Z+23,r13 + std Z+16,r6 + std Z+17,r7 + std Z+18,r8 + std Z+19,r9 + ldd r6,Z+32 + ldd r7,Z+33 + ldd r8,Z+34 + ldd r9,Z+35 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + std Z+32,r6 + std Z+33,r7 + std Z+34,r8 + std Z+35,r9 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + ldd r6,Z+40 + ldd r7,Z+41 + ldd r8,Z+42 + ldd r9,Z+43 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + std Z+40,r6 + std Z+41,r7 + std Z+42,r8 + std Z+43,r9 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + rol r24 + rol r25 + adc r14,r1 + lsl r14 + rol r15 + rol r24 + rol r25 + adc r14,r1 + lsl r14 + rol r15 + rol r24 + rol r25 + adc r14,r1 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ld r20,Z + ldd r21,Z+1 + ldd r22,Z+2 + ldd r23,Z+3 + eor r20,r18 + eor r21,r19 + ldd r26,Z+16 + ldd r27,Z+17 + ldd r28,Z+18 + ldd r29,Z+19 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + movw r6,r2 + movw r8,r4 + mov r0,r26 + com r0 + and r6,r0 + mov r0,r27 + com r0 + and r7,r0 + mov r0,r28 + com r0 + and r8,r0 + mov r0,r29 + com r0 + and r9,r0 + eor r20,r6 + eor r21,r7 + eor r22,r8 + eor r23,r9 + st Z,r20 + std Z+1,r21 + std Z+2,r22 + std Z+3,r23 + movw r6,r20 + movw r8,r22 + mov r0,r2 + com r0 + and r6,r0 + mov r0,r3 + com r0 + and r7,r0 + mov r0,r4 + com r0 + and r8,r0 + mov r0,r5 + com r0 + and r9,r0 + eor r26,r6 + eor r27,r7 + eor r28,r8 + eor r29,r9 + std Z+16,r26 + std Z+17,r27 + std Z+18,r28 + std Z+19,r29 + mov r0,r20 + com r0 + and r26,r0 + mov r0,r21 + com r0 + and r27,r0 + mov r0,r22 + com r0 + and r28,r0 + mov r0,r23 + com r0 + and r29,r0 + eor r2,r26 + eor r3,r27 + eor r4,r28 + eor r5,r29 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + ldd r20,Z+4 + ldd r21,Z+5 + ldd r22,Z+6 + ldd r23,Z+7 + ldd r26,Z+20 + ldd r27,Z+21 + ldd r28,Z+22 + ldd r29,Z+23 + ldd r2,Z+36 + ldd r3,Z+37 + ldd r4,Z+38 + ldd r5,Z+39 + movw r6,r2 + movw r8,r4 + mov r0,r26 + com r0 + and r6,r0 + mov r0,r27 + com r0 + and r7,r0 + mov r0,r28 + com r0 + and r8,r0 + mov r0,r29 + com r0 + and r9,r0 + eor r20,r6 + eor r21,r7 + eor r22,r8 + eor r23,r9 + std Z+4,r20 + std Z+5,r21 + std Z+6,r22 + std Z+7,r23 + movw r6,r20 + movw r8,r22 + mov r0,r2 + com r0 + and r6,r0 + mov r0,r3 + com r0 + and r7,r0 + mov r0,r4 + com r0 + and r8,r0 + mov r0,r5 + com r0 + and r9,r0 + eor r26,r6 + eor r27,r7 + eor r28,r8 + eor r29,r9 + std Z+20,r26 + std Z+21,r27 + std Z+22,r28 + std Z+23,r29 + mov r0,r20 + com r0 + and r26,r0 + mov r0,r21 + com r0 + and r27,r0 + mov r0,r22 + com r0 + and r28,r0 + mov r0,r23 + com r0 + and r29,r0 + eor r2,r26 + eor r3,r27 + eor r4,r28 + eor r5,r29 + std Z+36,r2 + std Z+37,r3 + std Z+38,r4 + std Z+39,r5 + ldd r20,Z+8 + ldd r21,Z+9 + ldd r22,Z+10 + ldd r23,Z+11 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r28,Z+26 + ldd r29,Z+27 + ldd r2,Z+40 + ldd r3,Z+41 + ldd r4,Z+42 + ldd r5,Z+43 + movw r6,r2 + movw r8,r4 + mov r0,r26 + com r0 + and r6,r0 + mov r0,r27 + com r0 + and r7,r0 + mov r0,r28 + com r0 + and r8,r0 + mov r0,r29 + com r0 + and r9,r0 + eor r20,r6 + eor r21,r7 + eor r22,r8 + eor r23,r9 + std Z+8,r20 + std Z+9,r21 + std Z+10,r22 + std Z+11,r23 + movw r6,r20 + movw r8,r22 + mov r0,r2 + com r0 + and r6,r0 + mov r0,r3 + com r0 + and r7,r0 + mov r0,r4 + com r0 + and r8,r0 + mov r0,r5 + com r0 + and r9,r0 + eor r26,r6 + eor r27,r7 + eor r28,r8 + eor r29,r9 + std Z+24,r26 + std Z+25,r27 + std Z+26,r28 + std Z+27,r29 + mov r0,r20 + com r0 + and r26,r0 + mov r0,r21 + com r0 + and r27,r0 + mov r0,r22 + com r0 + and r28,r0 + mov r0,r23 + com r0 + and r29,r0 + eor r2,r26 + eor r3,r27 + eor r4,r28 + eor r5,r29 + std Z+40,r2 + std Z+41,r3 + std Z+42,r4 + std Z+43,r5 + ldd r20,Z+12 + ldd r21,Z+13 + ldd r22,Z+14 + ldd r23,Z+15 + ldd r26,Z+28 + ldd r27,Z+29 + ldd r28,Z+30 + ldd r29,Z+31 + ldd r2,Z+44 + ldd r3,Z+45 + ldd r4,Z+46 + ldd r5,Z+47 + movw r6,r2 + movw r8,r4 + mov r0,r26 + com r0 + and r6,r0 + mov r0,r27 + com r0 + and r7,r0 + mov r0,r28 + com r0 + and r8,r0 + mov r0,r29 + com r0 + and r9,r0 + eor r20,r6 + eor r21,r7 + eor r22,r8 + eor r23,r9 + std Z+12,r20 + std Z+13,r21 + std Z+14,r22 + std Z+15,r23 + movw r6,r20 + movw r8,r22 + mov r0,r2 + com r0 + and r6,r0 + mov r0,r3 + com r0 + and r7,r0 + mov r0,r4 + com r0 + and r8,r0 + mov r0,r5 + com r0 + and r9,r0 + eor r26,r6 + eor r27,r7 + eor r28,r8 + eor r29,r9 + std Z+28,r26 + std Z+29,r27 + std Z+30,r28 + std Z+31,r29 + mov r0,r20 + com r0 + and r26,r0 + mov r0,r21 + com r0 + and r27,r0 + mov r0,r22 + com r0 + and r28,r0 + mov r0,r23 + com r0 + and r29,r0 + eor r2,r26 + eor r3,r27 + eor r4,r28 + eor r5,r29 + std Z+44,r2 + std Z+45,r3 + std Z+46,r4 + std Z+47,r5 + ldd r6,Z+16 + ldd r7,Z+17 + ldd r8,Z+18 + ldd r9,Z+19 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + std Z+16,r6 + std Z+17,r7 + std Z+18,r8 + std Z+19,r9 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + ldd r6,Z+24 + ldd r7,Z+25 + ldd r8,Z+26 + ldd r9,Z+27 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + std Z+24,r6 + std Z+25,r7 + std Z+26,r8 + std Z+27,r9 + ldd r6,Z+28 + ldd r7,Z+29 + ldd r8,Z+30 + ldd r9,Z+31 + lsl r6 + rol r7 + rol r8 + rol r9 + adc r6,r1 + std Z+28,r6 + std Z+29,r7 + std Z+30,r8 + std Z+31,r9 + ldd r6,Z+40 + ldd r7,Z+41 + ldd r8,Z+42 + ldd r9,Z+43 + ldd r10,Z+44 + ldd r11,Z+45 + ldd r12,Z+46 + ldd r13,Z+47 + ldd r14,Z+32 + ldd r15,Z+33 + ldd r24,Z+34 + ldd r25,Z+35 + std Z+40,r25 + std Z+41,r14 + std Z+42,r15 + std Z+43,r24 + ldd r14,Z+36 + ldd r15,Z+37 + ldd r24,Z+38 + ldd r25,Z+39 + std Z+44,r25 + std Z+45,r14 + std Z+46,r15 + std Z+47,r24 + std Z+32,r9 + std Z+33,r6 + std Z+34,r7 + std Z+35,r8 + std Z+36,r13 + std Z+37,r10 + std Z+38,r11 + std Z+39,r12 + ret +888: + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size xoodoo_permute, .-xoodoo_permute + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-xoodoo.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-xoodoo.c new file mode 100644 index 0000000..59bb8bf --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-xoodoo.c @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "internal-xoodoo.h" + +#if !defined(__AVR__) + +void xoodoo_permute(xoodoo_state_t *state) +{ + static uint16_t const rc[XOODOO_ROUNDS] = { + 0x0058, 0x0038, 0x03C0, 0x00D0, 0x0120, 0x0014, + 0x0060, 0x002C, 0x0380, 0x00F0, 0x01A0, 0x0012 + }; + uint8_t round; + uint32_t x00, x01, x02, x03; + uint32_t x10, x11, x12, x13; + uint32_t x20, x21, x22, x23; + uint32_t t1, t2; + + /* Load the state and convert from little-endian byte order */ +#if defined(LW_UTIL_LITTLE_ENDIAN) + x00 = state->S[0][0]; + x01 = state->S[0][1]; + x02 = state->S[0][2]; + x03 = state->S[0][3]; + x10 = state->S[1][0]; + x11 = state->S[1][1]; + x12 = state->S[1][2]; + x13 = state->S[1][3]; + x20 = state->S[2][0]; + x21 = state->S[2][1]; + x22 = state->S[2][2]; + x23 = state->S[2][3]; +#else + x00 = le_load_word32(state->B); + x01 = le_load_word32(state->B + 4); + x02 = le_load_word32(state->B + 8); + x03 = le_load_word32(state->B + 12); + x10 = le_load_word32(state->B + 16); + x11 = le_load_word32(state->B + 20); + x12 = le_load_word32(state->B + 24); + x13 = le_load_word32(state->B + 28); + x20 = le_load_word32(state->B + 32); + x21 = le_load_word32(state->B + 36); + x22 = le_load_word32(state->B + 40); + x23 = le_load_word32(state->B + 44); +#endif + + /* Perform all permutation rounds */ + for (round = 0; round < XOODOO_ROUNDS; ++round) { + /* Optimization ideas from the Xoodoo implementation here: + * https://github.com/XKCP/XKCP/tree/master/lib/low/Xoodoo/Optimized */ + + /* Step theta: Mix column parity */ + t1 = x03 ^ x13 ^ x23; + t2 = x00 ^ x10 ^ x20; + t1 = leftRotate5(t1) ^ leftRotate14(t1); + t2 = leftRotate5(t2) ^ leftRotate14(t2); + x00 ^= t1; + x10 ^= t1; + x20 ^= t1; + t1 = x01 ^ x11 ^ x21; + t1 = leftRotate5(t1) ^ leftRotate14(t1); + x01 ^= t2; + x11 ^= t2; + x21 ^= t2; + t2 = x02 ^ x12 ^ x22; + t2 = leftRotate5(t2) ^ leftRotate14(t2); + x02 ^= t1; + x12 ^= t1; + x22 ^= t1; + x03 ^= t2; + x13 ^= t2; + x23 ^= t2; + + /* Step rho-west: Plane shift */ + t1 = x13; + x13 = x12; + x12 = x11; + x11 = x10; + x10 = t1; + x20 = leftRotate11(x20); + x21 = leftRotate11(x21); + x22 = leftRotate11(x22); + x23 = leftRotate11(x23); + + /* Step iota: Add the round constant to the state */ + x00 ^= rc[round]; + + /* Step chi: Non-linear layer */ + x00 ^= (~x10) & x20; + x10 ^= (~x20) & x00; + x20 ^= (~x00) & x10; + x01 ^= (~x11) & x21; + x11 ^= (~x21) & x01; + x21 ^= (~x01) & x11; + x02 ^= (~x12) & x22; + x12 ^= (~x22) & x02; + x22 ^= (~x02) & x12; + x03 ^= (~x13) & x23; + x13 ^= (~x23) & x03; + x23 ^= (~x03) & x13; + + /* Step rho-east: Plane shift */ + x10 = leftRotate1(x10); + x11 = leftRotate1(x11); + x12 = leftRotate1(x12); + x13 = leftRotate1(x13); + t1 = leftRotate8(x22); + t2 = leftRotate8(x23); + x22 = leftRotate8(x20); + x23 = leftRotate8(x21); + x20 = t1; + x21 = t2; + } + + /* Convert back into little-endian and store to the output state */ +#if defined(LW_UTIL_LITTLE_ENDIAN) + state->S[0][0] = x00; + state->S[0][1] = x01; + state->S[0][2] = x02; + state->S[0][3] = x03; + state->S[1][0] = x10; + state->S[1][1] = x11; + state->S[1][2] = x12; + state->S[1][3] = x13; + state->S[2][0] = x20; + state->S[2][1] = x21; + state->S[2][2] = x22; + state->S[2][3] = x23; +#else + le_store_word32(state->B, x00); + le_store_word32(state->B + 4, x01); + le_store_word32(state->B + 8, x02); + le_store_word32(state->B + 12, x03); + le_store_word32(state->B + 16, x10); + le_store_word32(state->B + 20, x11); + le_store_word32(state->B + 24, x12); + le_store_word32(state->B + 28, x13); + le_store_word32(state->B + 32, x20); + le_store_word32(state->B + 36, x21); + le_store_word32(state->B + 40, x22); + le_store_word32(state->B + 44, x23); +#endif +} + +#endif /* !__AVR__ */ diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-xoodoo.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-xoodoo.h new file mode 100644 index 0000000..f6eddd8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/internal-xoodoo.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LW_INTERNAL_XOODOO_H +#define LW_INTERNAL_XOODOO_H + +#include "internal-util.h" + +/** + * \file internal-xoodoo.h + * \brief Internal implementation of the Xoodoo permutation. + * + * References: https://keccak.team/xoodyak.html + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Number of rows in the Xoodoo state. + */ +#define XOODOO_ROWS 3 + +/** + * \brief Number of columns in the Xoodoo state. + */ +#define XOODOO_COLS 4 + +/** + * \brief Number of rounds for the Xoodoo permutation. + */ +#define XOODOO_ROUNDS 12 + +/** + * \brief State information for the Xoodoo permutation. + */ +typedef union +{ + /** Words of the state */ + uint32_t S[XOODOO_ROWS][XOODOO_COLS]; + + /** Bytes of the state */ + uint8_t B[XOODOO_ROWS * XOODOO_COLS * sizeof(uint32_t)]; + +} xoodoo_state_t; + +/** + * \brief Permutes the Xoodoo state. + * + * \param state The Xoodoo state. + * + * The state will be in little-endian before and after the operation. + */ +void xoodoo_permute(xoodoo_state_t *state); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/xoodyak.c b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/xoodyak.c new file mode 100644 index 0000000..1be285b --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/xoodyak.c @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "xoodyak.h" +#include "internal-xoodoo.h" +#include + +aead_cipher_t const xoodyak_cipher = { + "Xoodyak", + XOODYAK_KEY_SIZE, + XOODYAK_NONCE_SIZE, + XOODYAK_TAG_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + xoodyak_aead_encrypt, + xoodyak_aead_decrypt +}; + +aead_hash_algorithm_t const xoodyak_hash_algorithm = { + "Xoodyak-Hash", + sizeof(xoodyak_hash_state_t), + XOODYAK_HASH_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + xoodyak_hash, + (aead_hash_init_t)xoodyak_hash_init, + (aead_hash_update_t)xoodyak_hash_absorb, + (aead_hash_finalize_t)xoodyak_hash_finalize, + (aead_xof_absorb_t)xoodyak_hash_absorb, + (aead_xof_squeeze_t)xoodyak_hash_squeeze +}; + +/** + * \brief Rate for absorbing data into the sponge state. + */ +#define XOODYAK_ABSORB_RATE 44 + +/** + * \brief Rate for squeezing data out of the sponge. + */ +#define XOODYAK_SQUEEZE_RATE 24 + +/** + * \brief Rate for absorbing and squeezing in hashing mode. + */ +#define XOODYAK_HASH_RATE 16 + +/** + * \brief Phase identifier for "up" mode, which indicates that a block + * permutation has just been performed. + */ +#define XOODYAK_PHASE_UP 0 + +/** + * \brief Phase identifier for "down" mode, which indicates that data has + * been absorbed but that a block permutation has not been done yet. + */ +#define XOODYAK_PHASE_DOWN 1 + +/** + * \brief Absorbs data into the Xoodoo permutation state. + * + * \param state Xoodoo permutation state. + * \param phase Points to the current phase, up or down. + * \param data Points to the data to be absorbed. + * \param len Length of the data to be absorbed. + */ +static void xoodyak_absorb + (xoodoo_state_t *state, uint8_t *phase, + const unsigned char *data, unsigned long long len) +{ + uint8_t domain = 0x03; + unsigned temp; + while (len > XOODYAK_ABSORB_RATE) { + if (*phase != XOODYAK_PHASE_UP) + xoodoo_permute(state); + lw_xor_block(state->B, data, XOODYAK_ABSORB_RATE); + state->B[XOODYAK_ABSORB_RATE] ^= 0x01; /* Padding */ + state->B[sizeof(state->B) - 1] ^= domain; + data += XOODYAK_ABSORB_RATE; + len -= XOODYAK_ABSORB_RATE; + domain = 0x00; + *phase = XOODYAK_PHASE_DOWN; + } + temp = (unsigned)len; + if (*phase != XOODYAK_PHASE_UP) + xoodoo_permute(state); + lw_xor_block(state->B, data, temp); + state->B[temp] ^= 0x01; /* Padding */ + state->B[sizeof(state->B) - 1] ^= domain; + *phase = XOODYAK_PHASE_DOWN; +} + +int xoodyak_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + xoodoo_state_t state; + uint8_t phase, domain; + unsigned temp; + (void)nsec; + + /* Set the length of the returned ciphertext */ + *clen = mlen + XOODYAK_TAG_SIZE; + + /* Initialize the state with the key and the nonce */ + memcpy(state.B, k, XOODYAK_KEY_SIZE); + memcpy(state.B + XOODYAK_KEY_SIZE, npub, XOODYAK_NONCE_SIZE); + state.B[XOODYAK_KEY_SIZE + XOODYAK_NONCE_SIZE] = XOODYAK_NONCE_SIZE; + state.B[XOODYAK_KEY_SIZE + XOODYAK_NONCE_SIZE + 1] = 0x01; /* Padding */ + memset(state.B + XOODYAK_KEY_SIZE + XOODYAK_NONCE_SIZE + 2, 0, sizeof(state.B) - (XOODYAK_KEY_SIZE + XOODYAK_NONCE_SIZE + 2)); + state.B[sizeof(state.B) - 1] = 0x02; /* Domain separation */ + phase = XOODYAK_PHASE_DOWN; + + /* Absorb the associated data */ + xoodyak_absorb(&state, &phase, ad, adlen); + + /* Encrypt the plaintext to produce the ciphertext */ + domain = 0x80; + while (mlen > XOODYAK_SQUEEZE_RATE) { + state.B[sizeof(state.B) - 1] ^= domain; + xoodoo_permute(&state); + lw_xor_block_2_dest(c, state.B, m, XOODYAK_SQUEEZE_RATE); + state.B[XOODYAK_SQUEEZE_RATE] ^= 0x01; /* Padding */ + c += XOODYAK_SQUEEZE_RATE; + m += XOODYAK_SQUEEZE_RATE; + mlen -= XOODYAK_SQUEEZE_RATE; + domain = 0; + } + state.B[sizeof(state.B) - 1] ^= domain; + xoodoo_permute(&state); + temp = (unsigned)mlen; + lw_xor_block_2_dest(c, state.B, m, temp); + state.B[temp] ^= 0x01; /* Padding */ + c += temp; + + /* Generate the authentication tag */ + state.B[sizeof(state.B) - 1] ^= 0x40; /* Domain separation */ + xoodoo_permute(&state); + memcpy(c, state.B, XOODYAK_TAG_SIZE); + return 0; +} + +int xoodyak_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + xoodoo_state_t state; + uint8_t phase, domain; + unsigned temp; + unsigned char *mtemp = m; + (void)nsec; + + /* Validate the ciphertext length and set the return "mlen" value */ + if (clen < XOODYAK_TAG_SIZE) + return -1; + *mlen = clen - XOODYAK_TAG_SIZE; + + /* Initialize the state with the key and the nonce */ + memcpy(state.B, k, XOODYAK_KEY_SIZE); + memcpy(state.B + XOODYAK_KEY_SIZE, npub, XOODYAK_NONCE_SIZE); + state.B[XOODYAK_KEY_SIZE + XOODYAK_NONCE_SIZE] = XOODYAK_NONCE_SIZE; + state.B[XOODYAK_KEY_SIZE + XOODYAK_NONCE_SIZE + 1] = 0x01; /* Padding */ + memset(state.B + XOODYAK_KEY_SIZE + XOODYAK_NONCE_SIZE + 2, 0, sizeof(state.B) - (XOODYAK_KEY_SIZE + XOODYAK_NONCE_SIZE + 2)); + state.B[sizeof(state.B) - 1] = 0x02; /* Domain separation */ + phase = XOODYAK_PHASE_DOWN; + + /* Absorb the associated data */ + xoodyak_absorb(&state, &phase, ad, adlen); + + /* Decrypt the ciphertext to produce the plaintext */ + domain = 0x80; + clen -= XOODYAK_TAG_SIZE; + while (clen > XOODYAK_SQUEEZE_RATE) { + state.B[sizeof(state.B) - 1] ^= domain; + xoodoo_permute(&state); + lw_xor_block_swap(m, state.B, c, XOODYAK_SQUEEZE_RATE); + state.B[XOODYAK_SQUEEZE_RATE] ^= 0x01; /* Padding */ + c += XOODYAK_SQUEEZE_RATE; + m += XOODYAK_SQUEEZE_RATE; + clen -= XOODYAK_SQUEEZE_RATE; + domain = 0; + } + state.B[sizeof(state.B) - 1] ^= domain; + xoodoo_permute(&state); + temp = (unsigned)clen; + lw_xor_block_swap(m, state.B, c, temp); + state.B[temp] ^= 0x01; /* Padding */ + c += temp; + + /* Check the authentication tag */ + state.B[sizeof(state.B) - 1] ^= 0x40; /* Domain separation */ + xoodoo_permute(&state); + return aead_check_tag(mtemp, *mlen, state.B, c, XOODYAK_TAG_SIZE); +} + +int xoodyak_hash + (unsigned char *out, const unsigned char *in, unsigned long long inlen) +{ + xoodyak_hash_state_t state; + xoodyak_hash_init(&state); + xoodyak_hash_absorb(&state, in, inlen); + xoodyak_hash_squeeze(&state, out, XOODYAK_HASH_SIZE); + return 0; +} + +#define XOODYAK_HASH_MODE_INIT_ABSORB 0 +#define XOODYAK_HASH_MODE_ABSORB 1 +#define XOODYAK_HASH_MODE_SQUEEZE 2 + +#define xoodoo_hash_permute(state) \ + xoodoo_permute((xoodoo_state_t *)((state)->s.state)) + +void xoodyak_hash_init(xoodyak_hash_state_t *state) +{ + memset(state, 0, sizeof(xoodyak_hash_state_t)); + state->s.mode = XOODYAK_HASH_MODE_INIT_ABSORB; +} + +void xoodyak_hash_absorb + (xoodyak_hash_state_t *state, const unsigned char *in, + unsigned long long inlen) +{ + uint8_t domain; + unsigned temp; + + /* If we were squeezing, then restart the absorb phase */ + if (state->s.mode == XOODYAK_HASH_MODE_SQUEEZE) { + xoodoo_hash_permute(state); + state->s.mode = XOODYAK_HASH_MODE_INIT_ABSORB; + state->s.count = 0; + } + + /* The first block needs a different domain separator to the others */ + domain = (state->s.mode == XOODYAK_HASH_MODE_INIT_ABSORB) ? 0x01 : 0x00; + + /* Absorb the input data into the state */ + while (inlen > 0) { + if (state->s.count >= XOODYAK_HASH_RATE) { + state->s.state[XOODYAK_HASH_RATE] ^= 0x01; /* Padding */ + state->s.state[sizeof(state->s.state) - 1] ^= domain; + xoodoo_hash_permute(state); + state->s.mode = XOODYAK_HASH_MODE_ABSORB; + state->s.count = 0; + domain = 0x00; + } + temp = XOODYAK_HASH_RATE - state->s.count; + if (temp > inlen) + temp = (unsigned)inlen; + lw_xor_block(state->s.state + state->s.count, in, temp); + state->s.count += temp; + in += temp; + inlen -= temp; + } +} + +void xoodyak_hash_squeeze + (xoodyak_hash_state_t *state, unsigned char *out, + unsigned long long outlen) +{ + uint8_t domain; + unsigned temp; + + /* If we were absorbing, then terminate the absorb phase */ + if (state->s.mode != XOODYAK_HASH_MODE_SQUEEZE) { + domain = (state->s.mode == XOODYAK_HASH_MODE_INIT_ABSORB) ? 0x01 : 0x00; + state->s.state[state->s.count] ^= 0x01; /* Padding */ + state->s.state[sizeof(state->s.state) - 1] ^= domain; + xoodoo_hash_permute(state); + state->s.mode = XOODYAK_HASH_MODE_SQUEEZE; + state->s.count = 0; + } + + /* Squeeze data out of the state */ + while (outlen > 0) { + if (state->s.count >= XOODYAK_HASH_RATE) { + /* Padding is always at index 0 for squeezing subsequent + * blocks because the number of bytes we have absorbed + * since the previous block was squeezed out is zero */ + state->s.state[0] ^= 0x01; + xoodoo_hash_permute(state); + state->s.count = 0; + } + temp = XOODYAK_HASH_RATE - state->s.count; + if (temp > outlen) + temp = (unsigned)outlen; + memcpy(out, state->s.state + state->s.count, temp); + state->s.count += temp; + out += temp; + outlen -= temp; + } +} + +void xoodyak_hash_finalize + (xoodyak_hash_state_t *state, unsigned char *out) +{ + xoodyak_hash_squeeze(state, out, XOODYAK_HASH_SIZE); +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/xoodyak.h b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/xoodyak.h new file mode 100644 index 0000000..f4777d5 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakround3/rhys/xoodyak.h @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LWCRYPTO_XOODYAK_H +#define LWCRYPTO_XOODYAK_H + +#include "aead-common.h" + +/** + * \file xoodyak.h + * \brief Xoodyak authenticated encryption algorithm. + * + * Xoodyak is an authenticated encryption and hash algorithm pair based + * around the 384-bit Xoodoo permutation that is similar in structure to + * Keccak but is more efficient than Keccak on 32-bit embedded devices. + * The Cyclist mode of operation is used to convert the permutation + * into a sponge for the higher-level algorithms. + * + * The Xoodyak encryption mode has a 128-bit key, a 128-bit nonce, + * and a 128-bit authentication tag. The Xoodyak hashing mode has a + * 256-bit fixed hash output and can also be used as an extensible + * output function (XOF). + * + * The Xoodyak specification describes a re-keying mechanism where the + * key for one packet is used to derive the key to use on the next packet. + * This provides some resistance against side channel attacks by making + * the session key a moving target. This library does not currently + * implement re-keying. + * + * References: https://keccak.team/xoodyak.html + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Size of the key for Xoodyak. + */ +#define XOODYAK_KEY_SIZE 16 + +/** + * \brief Size of the authentication tag for Xoodyak. + */ +#define XOODYAK_TAG_SIZE 16 + +/** + * \brief Size of the nonce for Xoodyak. + */ +#define XOODYAK_NONCE_SIZE 16 + +/** + * \brief Size of the hash output for Xoodyak. + */ +#define XOODYAK_HASH_SIZE 32 + +/** + * \brief State information for Xoodyak incremental hashing modes. + */ +typedef union +{ + struct { + unsigned char state[48]; /**< Current hash state */ + unsigned char count; /**< Number of bytes in the current block */ + unsigned char mode; /**< Hash mode: absorb or squeeze */ + } s; /**< State */ + unsigned long long align; /**< For alignment of this structure */ + +} xoodyak_hash_state_t; + +/** + * \brief Meta-information block for the Xoodyak cipher. + */ +extern aead_cipher_t const xoodyak_cipher; + +/** + * \brief Meta-information block for the Xoodyak hash algorithm. + */ +extern aead_hash_algorithm_t const xoodyak_hash_algorithm; + +/** + * \brief Encrypts and authenticates a packet with Xoodyak. + * + * \param c Buffer to receive the output. + * \param clen On exit, set to the length of the output which includes + * the ciphertext and the 16 byte authentication tag. + * \param m Buffer that contains the plaintext message to encrypt. + * \param mlen Length of the plaintext message in bytes. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param nsec Secret nonce - not used by this algorithm. + * \param npub Points to the public nonce for the packet which must + * be 16 bytes in length. + * \param k Points to the 16 bytes of the key to use to encrypt the packet. + * + * \return 0 on success, or a negative value if there was an error in + * the parameters. + * + * \sa xoodyak_aead_decrypt() + */ +int xoodyak_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Decrypts and authenticates a packet with Xoodyak. + * + * \param m Buffer to receive the plaintext message on output. + * \param mlen Receives the length of the plaintext message on output. + * \param nsec Secret nonce - not used by this algorithm. + * \param c Buffer that contains the ciphertext and authentication + * tag to decrypt. + * \param clen Length of the input data in bytes, which includes the + * ciphertext and the 16 byte authentication tag. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param npub Points to the public nonce for the packet which must + * be 16 bytes in length. + * \param k Points to the 16 bytes of the key to use to decrypt the packet. + * + * \return 0 on success, -1 if the authentication tag was incorrect, + * or some other negative number if there was an error in the parameters. + * + * \sa xoodyak_aead_encrypt() + */ +int xoodyak_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Hashes a block of input data with Xoodyak to generate a hash value. + * + * \param out Buffer to receive the hash output which must be at least + * XOODYAK_HASH_SIZE bytes in length. + * \param in Points to the input data to be hashed. + * \param inlen Length of the input data in bytes. + * + * \return Returns zero on success or -1 if there was an error in the + * parameters. + */ +int xoodyak_hash + (unsigned char *out, const unsigned char *in, unsigned long long inlen); + +/** + * \brief Initializes the state for a Xoodyak hashing operation. + * + * \param state Hash state to be initialized. + * + * \sa xoodyak_hash_absorb(), xoodyak_hash_squeeze(), xoodyak_hash() + */ +void xoodyak_hash_init(xoodyak_hash_state_t *state); + +/** + * \brief Aborbs more input data into a Xoodyak hashing state. + * + * \param state Hash state to be updated. + * \param in Points to the input data to be absorbed into the state. + * \param inlen Length of the input data to be absorbed into the state. + * + * \sa xoodyak_hash_init(), xoodyak_hash_squeeze() + */ +void xoodyak_hash_absorb + (xoodyak_hash_state_t *state, const unsigned char *in, + unsigned long long inlen); + +/** + * \brief Squeezes output data from a Xoodyak hashing state. + * + * \param state Hash state to squeeze the output data from. + * \param out Points to the output buffer to receive the squeezed data. + * \param outlen Number of bytes of data to squeeze out of the state. + * + * \sa xoodyak_hash_init(), xoodyak_hash_absorb() + */ +void xoodyak_hash_squeeze + (xoodyak_hash_state_t *state, unsigned char *out, + unsigned long long outlen); + +/** + * \brief Returns the final hash value from a Xoodyak hashing operation. + * + * \param state Hash state to be finalized. + * \param out Points to the output buffer to receive the hash value. + * + * \note This is a wrapper around xoodyak_hash_squeeze() for a fixed length + * of XOODYAK_HASH_SIZE bytes. + * + * \sa xoodyak_hash_init(), xoodyak_hash_absorb() + */ +void xoodyak_hash_finalize + (xoodyak_hash_state_t *state, unsigned char *out); + +#ifdef __cplusplus +} +#endif + +#endif