diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Cyclist.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Cyclist.inc b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Cyclist.inc new file mode 100644 index 0000000..ba7a156 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Cyclist.inc @@ -0,0 +1,327 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + assert(instance->mode == Cyclist_ModeKeyed); + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodoo-SnP.h new file mode 100644 index 0000000..7d0c98b --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodoo-SnP.h @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized ARM assembler implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#define Xoodoo_FastXoofff_supported +void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen); +size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length); +size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodoo-uf-armv6-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodoo-uf-armv6-le-gcc.s new file mode 100644 index 0000000..0baa5db --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodoo-uf-armv6-le-gcc.s @@ -0,0 +1,726 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6 architecture (e.g.,@ ARM11). + + +.text + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Initialize(void *state) +@ + .align 4 +.global Xoodoo_Initialize +.type Xoodoo_Initialize, %function; +Xoodoo_Initialize: + movs r1, #0 + movs r2, #0 + movs r3, #0 + movs r12, #0 + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_AddBytes +.type Xoodoo_AddBytes, %function; +Xoodoo_AddBytes: + push {r4,lr} + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_AddBytes_Bytes +Xoodoo_AddBytes_LanesLoop: @ then, perform on lanes + ldr r2, [r0] + ldr r4, [r1], #4 + eors r2, r2, r4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_AddBytes_LanesLoop +Xoodoo_AddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_AddBytes_Exit +Xoodoo_AddBytes_BytesLoop: + ldrb r2, [r0] + ldrb r4, [r1], #1 + eors r2, r2, r4 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_AddBytes_BytesLoop +Xoodoo_AddBytes_Exit: + pop {r4,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_OverwriteBytes +.type Xoodoo_OverwriteBytes, %function; +Xoodoo_OverwriteBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_OverwriteBytes_Bytes +Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words + ldr r2, [r1], #4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_OverwriteBytes_LanesLoop +Xoodoo_OverwriteBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_OverwriteBytes_Exit +Xoodoo_OverwriteBytes_BytesLoop: + ldrb r2, [r1], #1 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_OverwriteBytes_BytesLoop +Xoodoo_OverwriteBytes_Exit: + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +@ + .align 4 +.global Xoodoo_OverwriteWithZeroes +.type Xoodoo_OverwriteWithZeroes, %function; +Xoodoo_OverwriteWithZeroes: + movs r3, #0 + lsrs r2, r1, #2 + beq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + str r3, [r0], #4 + subs r2, r2, #1 + bne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + ands r1, #3 + beq Xoodoo_OverwriteWithZeroes_Exit +Xoodoo_OverwriteWithZeroes_LoopBytes: + strb r3, [r0], #1 + subs r1, r1, #1 + bne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_Exit: + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_ExtractBytes +.type Xoodoo_ExtractBytes, %function; +Xoodoo_ExtractBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractBytes_Bytes +Xoodoo_ExtractBytes_LanesLoop: @ then, handle words + ldr r2, [r0], #4 + str r2, [r1], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractBytes_LanesLoop +Xoodoo_ExtractBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractBytes_Exit +Xoodoo_ExtractBytes_BytesLoop: + ldrb r2, [r0], #1 + strb r2, [r1], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractBytes_BytesLoop +Xoodoo_ExtractBytes_Exit: + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_ExtractAndAddBytes +.type Xoodoo_ExtractAndAddBytes, %function; +Xoodoo_ExtractAndAddBytes: + push {r4,r5} + adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length) + ldr r3, [sp, #8] @ get length argument from stack + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractAndAddBytes_Bytes +Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, handle words + ldr r5, [r0], #4 + ldr r4, [r1], #4 + eors r5, r5, r4 + str r5, [r2], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractAndAddBytes_LanesLoop +Xoodoo_ExtractAndAddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractAndAddBytes_Exit +Xoodoo_ExtractAndAddBytes_BytesLoop: + ldrb r5, [r0], #1 + ldrb r4, [r1], #1 + eors r5, r5, r4 + strb r5, [r2], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractAndAddBytes_BytesLoop +Xoodoo_ExtractAndAddBytes_Exit: + pop {r4,r5} + bx lr + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1, 0x00000003 +.equ _rc5x2, 0x0b000000 +.equ _rc4x3, 0x07000000 +.equ _rc3x4, 0x000f0000 +.equ _rc2x5, 0x0000d000 +.equ _rc1x6, 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_6rounds( void *state ) +@ + .align 4 +.global Xoodoo_Permute_6rounds +.type Xoodoo_Permute_6rounds, %function; +Xoodoo_Permute_6rounds: + push {r0,r4-r11,lr} + ldmia r0!, {r2-r5} + ldmia r0!, {r8-r9} + ldmia r0!, {r6-r7} + ldmia r0, {r10-r12,lr} + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + pop {r0,r1} + ror r2, r2, #32-(6*_r0)%32 + ror r3, r3, #32-(6*_r0)%32 + ror r4, r4, #32-(6*_r0)%32 + ror r5, r5, #32-(6*_r0)%32 + ror r6, r6, #32-(6*_r0+1)%32 + ror r7, r7, #32-(6*_r0+1)%32 + ror r8, r8, #32-(6*_r0+1)%32 + ror r9, r9, #32-(6*_r0+1)%32 + ror r10, r10, #32-(6*_r0+_e1+_w1)%32 + ror r11, r11, #32-(6*_r0+_e1+_w1)%32 + ror r12, r12, #32-(6*_r0+_e1+_w1)%32 + ror lr, lr, #32-(6*_r0+_e1+_w1)%32 + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_12rounds( void *state ) +@ + .align 4 +.global Xoodoo_Permute_12rounds +.type Xoodoo_Permute_12rounds, %function; +Xoodoo_Permute_12rounds: + push {r0,r4-r11,lr} + ldmia r0, {r2-r12,lr} + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + pop {r0,r1} + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + + +.equ Xoofff_BlockSize , 3*4*4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen) + .align 4 +.global Xoofff_AddIs +.type Xoofff_AddIs, %function; +Xoofff_AddIs: + push {r4-r10,lr} + + subs r2, r2, #Xoofff_BlockSize*8 + bcc Xoofff_AddIs_LessThanBlock +Xoofff_AddIs_BlockLoop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + subs r2, r2, #Xoofff_BlockSize*8 + bcs Xoofff_AddIs_BlockLoop +Xoofff_AddIs_LessThanBlock: + adds r2, r2, #Xoofff_BlockSize*8 + beq Xoofff_AddIs_Return + subs r2, r2, #16*8 + bcc Xoofff_AddIs_LessThan16 +Xoofff_AddIs_16Loop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + subs r2, r2, #16*8 + bcs Xoofff_AddIs_16Loop +Xoofff_AddIs_LessThan16: + adds r2, r2, #16*8 + beq Xoofff_AddIs_Return + subs r2, r2, #4*8 + bcc Xoofff_AddIs_LessThan4 +Xoofff_AddIs_4Loop: + ldr r3, [r0] + ldr r7, [r1], #4 + eors r3, r3, r7 + str r3, [r0], #4 + subs r2, r2, #4*8 + bcs Xoofff_AddIs_4Loop +Xoofff_AddIs_LessThan4: + adds r2, r2, #4*8 + beq Xoofff_AddIs_Return + subs r2, r2, #8 + bcc Xoofff_AddIs_LessThan1 +Xoofff_AddIs_1Loop: + ldrb r3, [r0] + ldrb r7, [r1], #1 + eors r3, r3, r7 + strb r3, [r0], #1 + subs r2, r2, #8 + bcs Xoofff_AddIs_1Loop +Xoofff_AddIs_LessThan1: + adds r2, r2, #8 + beq Xoofff_AddIs_Return + ldrb r3, [r0] + ldrb r7, [r1] + movs r1, #1 + eors r3, r3, r7 + lsls r1, r1, r2 + subs r1, r1, #1 + ands r3, r3, r1 + strb r3, [r0] +Xoofff_AddIs_Return: + pop {r4-r10,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length) +@ +.equ Xoofff_Compress_kRoll , 0 +.equ Xoofff_Compress_input , 4 +.equ Xoofff_Compress_xAccu , 8 +.equ Xoofff_Compress_iInput , 12 +.equ Xoofff_Compress_length , 16 + + .align 4 +.global Xoofff_CompressFastLoop +.type Xoofff_CompressFastLoop, %function; +Xoofff_CompressFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial kRoll +Xoofff_CompressFastLoop_Loop: + ldr r0, [sp, #Xoofff_Compress_input] @ add input + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors r12, r12, r1 + ldr r1, [r0], #4 + eors lr, lr, r1 + str r0, [sp, #Xoofff_Compress_input] + + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Extract and add into xAccu + ldr r0, [sp, #Xoofff_Compress_xAccu] + ldr r1, [r0] + mRloXor r2, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r2, [r0], #4 + mRloXor r3, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r3, [r0], #4 + mRloXor r4, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r4, [r0], #4 + mRloXor r5, r1, (6*_r0)%32 + str r5, [r0], #4 + + ldm r0, {r2-r5} @ note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + mRliXor r3, r9, (6*_r0+1)%32 + mRliXor r4, r6, (6*_r0+1)%32 + mRliXor r5, r7, (6*_r0+1)%32 + stm r0!, {r2-r5} + + ldm r0, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + stm r0!, {r2-r5} + + @roll kRoll + ldr r0, [sp, #Xoofff_Compress_kRoll] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + eors lr, lr, lr, LSL #13 + eors lr, lr, r2, ROR #32-3 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Compress_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Compress_length] + bcs Xoofff_CompressFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Compress_input] + ldr r1, [sp, #Xoofff_Compress_iInput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length) +@ +.equ Xoofff_Expand_yAccu , 0 +.equ Xoofff_Expand_output , 4 +.equ Xoofff_Expand_kRoll , 8 +.equ Xoofff_Expand_iOutput , 12 +.equ Xoofff_Expand_length , 16 + + .align 4 +.global Xoofff_ExpandFastLoop +.type Xoofff_ExpandFastLoop, %function; +Xoofff_ExpandFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial yAccu +Xoofff_ExpandFastLoop_Loop: + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Add k and extract + ldr r0, [sp, #Xoofff_Expand_kRoll] + ldr r1, [r0], #4 + mRloXor r2, r1, (6*_r0)%32 + + ldr r1, [sp, #Xoofff_Expand_output] + str r2, [r1], #4 + + ldr r2, [r0], #4 + mRloXor r3, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r3, [r1], #4 + mRloXor r4, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r4, [r1], #4 + mRloXor r5, r2, (6*_r0)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} @ Note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + str r2, [r1], #4 + mRliXor r3, r9, (6*_r0+1)%32 + str r3, [r1], #4 + mRliXor r4, r6, (6*_r0+1)%32 + str r4, [r1], #4 + mRliXor r5, r7, (6*_r0+1)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + str r2, [r1], #4 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + str r3, [r1], #4 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + str r4, [r1], #4 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + str r5, [r1], #4 + + @ roll-e yAccu + ldr r0, [sp, #Xoofff_Expand_yAccu] + str r1, [sp, #Xoofff_Expand_output] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + and r1, r6, r2 + eor lr, r1, lr, ROR #32-5 + eor lr, lr, r2, ROR #32-13 + eor lr, lr, #7 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Expand_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Expand_length] + bcs Xoofff_ExpandFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Expand_output] + ldr r1, [sp, #Xoofff_Expand_iOutput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodoo.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodyak-uf-armv6-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodyak-uf-armv6-le-gcc.s new file mode 100644 index 0000000..68fb7db --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodyak-uf-armv6-le-gcc.s @@ -0,0 +1,563 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6 architecture (e.g.,@ ARM11). + + +.text + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1 , 0x00000003 +.equ _rc5x2 , 0x0b000000 +.equ _rc4x3 , 0x07000000 +.equ _rc3x4 , 0x000f0000 +.equ _rc2x5 , 0x0000d000 +.equ _rc1x6 , 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +.equ offsetInstance , 0 +.equ offsetInitialLen , 16 +.equ offsetReturn , 20 + +@ ---------------------------------------------------------------------------- +@ +@ Xoodoo_Permute_12roundsAsm: only callable from asm +@ + .align 4 +.type Xoodoo_Permute_12roundsAsm, %function; +Xoodoo_Permute_12roundsAsm: + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + ldr pc, [sp, #offsetReturn] + + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@ +@ X += Xoodyak_Rkin@ +@ XLen -= Xoodyak_Rkin@ +@ } while (XLen >= Xoodyak_Rkin)@ +@ +@ return initialLength - XLen@ +@ } +@ +.equ offsetAbsorbX , 4 +.equ offsetAbsorbXLen , 8 + + .align 4 +.global Xoodyak_AbsorbKeyedFullBlocks +.type Xoodyak_AbsorbKeyedFullBlocks, %function; +Xoodyak_AbsorbKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #44 + ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbKeyedFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors lr, lr, #1 + eors r12, r12, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #44 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #44 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@ +@ X += Xoodyak_Rhash@ +@ XLen -= Xoodyak_Rhash@ +@ } while (XLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - XLen@ +@ } +@ + .align 4 +.global Xoodyak_AbsorbHashFullBlocks +.type Xoodyak_AbsorbHashFullBlocks, %function; +Xoodyak_AbsorbHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbHashFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbHashFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r6, r6, #1 + eors r5, r5, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #16 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@ +@ Y += Xoodyak_Rkout@ +@ YLen -= Xoodyak_Rkout@ +@ } while (YLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - YLen@ +@ } +@ +.equ offsetSqueezeY , 4 +.equ offsetSqueezeYLen , 8 + + .align 4 +.global Xoodyak_SqueezeKeyedFullBlocks +.type Xoodyak_SqueezeKeyedFullBlocks, %function; +Xoodyak_SqueezeKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #24 + ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeKeyedFullBlocks_Loop: + eors r2, r2, #1 + b Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + str r7, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #24 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #24 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@ +@ Y += Xoodyak_Rhash@ +@ YLen -= Xoodyak_Rhash@ +@ } while (YLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - YLen@ +@ } +@ + .align 4 +.global Xoodyak_SqueezeHashFullBlocks +.type Xoodyak_SqueezeHashFullBlocks, %function; +Xoodyak_SqueezeHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeHashFullBlocks_Loop: + eors r2, r2, #1 + b Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeHashFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #16 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ +.equ offsetCryptI , 4+8 +.equ offsetCryptO , 8+8 +.equ offsetCryptIOLen , 12 + + .align 4 +.global Xoodyak_EncryptFullBlocks +.type Xoodyak_EncryptFullBlocks, %function; +Xoodyak_EncryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_EncryptFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_EncryptFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_EncryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + eors r7, r7, r1 + str r7, [r10], #4 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_EncryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ + .align 4 +.global Xoodyak_DecryptFullBlocks +.type Xoodyak_DecryptFullBlocks, %function; +Xoodyak_DecryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_DecryptFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_DecryptFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_DecryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + mov r2, r0 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + mov r3, r1 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + mov r4, r0 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + mov r5, r1 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + mov r6, r0 + eors r7, r7, r1 + str r7, [r10], #4 + mov r7, r1 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_DecryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodyak.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodyak.c new file mode 100644 index 0000000..e0b67b5 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/Xoodyak.c @@ -0,0 +1,53 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#include +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/align.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/api.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/brg_endian.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/config.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/encrypt.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/encrypt.c new file mode 100644 index 0000000..3090334 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6/encrypt.c @@ -0,0 +1,92 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_aead.h" +#include "api.h" +#include "Xoodyak.h" +#include + +#if !defined(CRYPTO_KEYBYTES) + #define CRYPTO_KEYBYTES 16 +#endif +#if !defined(CRYPTO_NPUBBYTES) + #define CRYPTO_NPUBBYTES 16 +#endif + +#define TAGLEN 16 + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + + (void)nsec; + + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Encrypt(&instance, m, c, (size_t)mlen); + Xoodyak_Squeeze(&instance, c + mlen, TAGLEN); + *clen = mlen + TAGLEN; + #if 0 + { + unsigned int i; + for (i = 0; i < *clen; ++i ) + { + printf("\\x%02x", c[i] ); + } + printf("\n"); + } + #endif + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + unsigned char tag[TAGLEN]; + unsigned long long mlen_; + + (void)nsec; + + *mlen = 0; + if (clen < TAGLEN) { + return -1; + } + mlen_ = clen - TAGLEN; + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_); + Xoodyak_Squeeze(&instance, tag, TAGLEN); + if (memcmp(tag, c + mlen_, TAGLEN) != 0) { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Cyclist.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Cyclist.inc b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Cyclist.inc new file mode 100644 index 0000000..ba7a156 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Cyclist.inc @@ -0,0 +1,327 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + assert(instance->mode == Cyclist_ModeKeyed); + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodoo-SnP.h new file mode 100644 index 0000000..7d0c98b --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodoo-SnP.h @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized ARM assembler implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#define Xoodoo_FastXoofff_supported +void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen); +size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length); +size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodoo-u1-armv6m-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodoo-u1-armv6m-le-gcc.s new file mode 100644 index 0000000..91c20c6 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodoo-u1-armv6m-le-gcc.s @@ -0,0 +1,1092 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6m architecture (Cortex-M0, ...). + + + .thumb + .syntax unified +.text + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Initialize(void *state) +@ + .align 4 +.global Xoodoo_Initialize +.type Xoodoo_Initialize, %function; +Xoodoo_Initialize: + movs r1, #0 + movs r2, #0 + movs r3, #0 + stmia r0!, { r1 - r3 } + stmia r0!, { r1 - r3 } + stmia r0!, { r1 - r3 } + stmia r0!, { r1 - r3 } + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_AddBytes +.type Xoodoo_AddBytes, %function; +Xoodoo_AddBytes: + push {r4,lr} + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_AddBytes_Bytes + movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed + orrs r2, r2, r1 + lsls r2, #30 + bne Xoodoo_AddBytes_Bytes +Xoodoo_AddBytes_LanesLoop: @ then, perform on words + ldr r2, [r0] + ldmia r1!, {r4} + eors r2, r2, r4 + stmia r0!, {r2} + subs r3, r3, #4 + bcs Xoodoo_AddBytes_LanesLoop +Xoodoo_AddBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_AddBytes_Exit + subs r3, r3, #1 +Xoodoo_AddBytes_BytesLoop: + ldrb r2, [r0, r3] + ldrb r4, [r1, r3] + eors r2, r2, r4 + strb r2, [r0, r3] + subs r3, r3, #1 + bcs Xoodoo_AddBytes_BytesLoop +Xoodoo_AddBytes_Exit: + pop {r4,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_OverwriteBytes +.type Xoodoo_OverwriteBytes, %function; +Xoodoo_OverwriteBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_OverwriteBytes_Bytes + movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed + orrs r2, r2, r1 + lsls r2, #30 + bne Xoodoo_OverwriteBytes_Bytes +Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words + ldmia r1!, {r2} + stmia r0!, {r2} + subs r3, r3, #4 + bcs Xoodoo_OverwriteBytes_LanesLoop +Xoodoo_OverwriteBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_OverwriteBytes_Exit + subs r3, r3, #1 +Xoodoo_OverwriteBytes_BytesLoop: + ldrb r2, [r1, r3] + strb r2, [r0, r3] + subs r3, r3, #1 + bcs Xoodoo_OverwriteBytes_BytesLoop +Xoodoo_OverwriteBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +@ +.global Xoodoo_OverwriteWithZeroes +.type Xoodoo_OverwriteWithZeroes, %function; +Xoodoo_OverwriteWithZeroes: + movs r3, #0 + lsrs r2, r1, #2 + beq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + stm r0!, { r3 } + subs r2, r2, #1 + bne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + lsls r1, r1, #32-2 + beq Xoodoo_OverwriteWithZeroes_Exit + lsrs r1, r1, #32-2 +Xoodoo_OverwriteWithZeroes_LoopBytes: + subs r1, r1, #1 + strb r3, [r0, r1] + bne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractBytes +.type Xoodoo_ExtractBytes, %function; +Xoodoo_ExtractBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractBytes_Bytes + movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed + orrs r2, r2, r1 + lsls r2, #30 + bne Xoodoo_ExtractBytes_Bytes +Xoodoo_ExtractBytes_LanesLoop: @ then, perform on words + ldmia r0!, {r2} + stmia r1!, {r2} + subs r3, r3, #4 + bcs Xoodoo_ExtractBytes_LanesLoop +Xoodoo_ExtractBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_ExtractBytes_Exit + subs r3, r3, #1 +Xoodoo_ExtractBytes_BytesLoop: + ldrb r2, [r0, r3] + strb r2, [r1, r3] + subs r3, r3, #1 + bcs Xoodoo_ExtractBytes_BytesLoop +Xoodoo_ExtractBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractAndAddBytes +.type Xoodoo_ExtractAndAddBytes, %function; +Xoodoo_ExtractAndAddBytes: + push {r4,r5} + adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length) + ldr r3, [sp, #8] @ get length argument from stack + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractAndAddBytes_Bytes + movs r5, r0 @ and input/output/state pointer all 32-bit .align 8ed + orrs r5, r5, r1 + orrs r5, r5, r2 + lsls r5, #30 + bne Xoodoo_ExtractAndAddBytes_Bytes +Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, perform on words + ldmia r0!, {r5} + ldmia r1!, {r4} + eors r5, r5, r4 + stmia r2!, {r5} + subs r3, r3, #4 + bcs Xoodoo_ExtractAndAddBytes_LanesLoop +Xoodoo_ExtractAndAddBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_ExtractAndAddBytes_Exit + subs r3, r3, #1 +Xoodoo_ExtractAndAddBytes_BytesLoop: + ldrb r5, [r0, r3] + ldrb r4, [r1, r3] + eors r5, r5, r4 + strb r5, [r2, r3] + subs r3, r3, #1 + bcs Xoodoo_ExtractAndAddBytes_BytesLoop +Xoodoo_ExtractAndAddBytes_Exit: + pop {r4,r5} + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- + +@ offsets in RAM state +.equ _oA00 , 0*4 +.equ _oA01 , 1*4 +.equ _oA02 , 2*4 +.equ _oA03 , 3*4 +.equ _oA10 , 4*4 +.equ _oA11 , 5*4 +.equ _oA12 , 6*4 +.equ _oA13 , 7*4 +.equ _oA20 , 8*4 +.equ _oA21 , 9*4 +.equ _oA22 , 10*4 +.equ _oA23 , 11*4 + +@ possible locations of state lanes +.equ locRegL , 1 +.equ locRegH , 2 +.equ locMem , 3 + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _r2 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, loc, tt + mov \ro, \a1 + eors \ro, \ro, \a2 + .if \loc == locRegL + eors \ro, \ro, \a0 + .else + .if \loc == locRegH + mov \tt, \a0 + .else + ldr \tt, [sp, #\a0] + .endif + eors \ro, \ro, \tt + .endif + .endm + +.macro mXor ro, ri, tt, loc + .if \loc == locRegL + eors \ro, \ro, \ri + .else + .if \loc == locRegH + mov \tt, \ro + eors \tt, \tt, \ri + mov \ro, \tt + .else + ldr \tt, [sp, #\ro] + eors \tt, \tt, \ri + str \tt, [sp, #\ro] + .endif + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1,a0s,loc + mov \r1, \a2 + mov \r0, \a1 + bics \r1, \r1, \r0 + eors \a0, \a0, \r1 + .if \loc != locRegL + .if \loc == locRegH + mov \a0s, \a0 + .else + str \a0, [sp, #\a0s] + .endif + .endif + + mov \r0, \a0 + bics \r0, \r0, \a2 + mov \r1, \a1 + eors \r1, \r1, \r0 + mov \a1, \r1 + + bics \r1, \r1, \a0 + eors \a2, \a2, \r1 + .endm + +.macro mRound offsetRC, offsetA03 + + @ Theta: Column Parity Mixer + mXor3 r0, \offsetA03, lr, r7, locMem, r2 + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r3, r10, r4, locRegL, r2 + mXor r3, r1, r2, locRegL + mXor r10, r1, r2, locRegH + mXor r4, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r8, r11, r5, locRegH, r2 + mXor r8, r1, r2, locRegH + mXor r11, r1, r2, locRegH + mXor r5, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r9, r12, r6, locRegH, r2 + mXor r9, r1, r2, locRegH + mXor r12, r1, r2, locRegH + mXor r6, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor \offsetA03, r1, r2, locMem + mXor lr, r1, r2, locRegH + mXor r7, r1, r2, locRegL + + @ Rho-west: Plane shift + movs r0, #32-_w1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + mov r0, lr + mov lr, r12 + mov r12, r11 + mov r11, r10 + mov r10, r0 + + @ Iota: round constant + ldr r0, [sp, #\offsetRC] + ldmia r0!, {r1} + str r0, [sp, #\offsetRC] + eors r3, r3, r1 + + @ Chi: non linear step, on colums + mChi3 r3, r10, r4, r0, r1, r3, locRegL + mov r2, r8 + mChi3 r2, r11, r5, r0, r1, r8, locRegH + mov r2, r9 + mChi3 r2, r12, r6, r0, r1, r9, locRegH + ldr r2, [sp, #\offsetA03] + mChi3 r2, lr, r7, r0, r1, \offsetA03, locMem + + @ Rho-east: Plane shift + movs r0, #32-1 + mov r1, r10 + rors r1, r1, r0 + mov r10, r1 + mov r1, r11 + rors r1, r1, r0 + mov r11, r1 + mov r1, r12 + rors r1, r1, r0 + mov r12, r1 + mov r1, lr + rors r1, r1, r0 + mov lr, r1 + + movs r0, #32-_e1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + + mov r0, r4 + mov r4, r6 + mov r6, r0 + mov r0, r5 + mov r5, r7 + mov r7, r0 + + .endm + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds) +@ + +@ offsets on stack +.equ Xoodoo_Permute_Nrounds_offsetA03 , 0 +.equ Xoodoo_Permute_Nrounds_offsetRC , 4 +.equ Xoodoo_Permute_Nrounds_SAS , 8 +.equ Xoodoo_Permute_Nrounds_offsetState , Xoodoo_Permute_Nrounds_SAS + +.global Xoodoo_Permute_Nrounds +.type Xoodoo_Permute_Nrounds, %function; +Xoodoo_Permute_Nrounds: + push {r4-r6,lr} + mov r2, r8 + mov r3, r9 + mov r4, r10 + mov r5, r11 + push {r0,r2-r5,r7} + + sub sp, #Xoodoo_Permute_Nrounds_SAS + adr r2, Xoodoo_Permute_RoundConstants12 + lsls r1, r1, #2 + subs r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_Nrounds_offsetRC] + + ldm r0!, {r3,r5,r6,r7} + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_Nrounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodoo_Permute_Nrouds_Loop: + mRound Xoodoo_Permute_Nrounds_offsetRC, Xoodoo_Permute_Nrounds_offsetA03 + ldr r0, [sp, #Xoodoo_Permute_Nrounds_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoodoo_Permute_Nrouds_Done + b Xoodoo_Permute_Nrouds_Loop +Xoodoo_Permute_Nrouds_Done: + ldr r0, [sp, #Xoodoo_Permute_Nrounds_offsetState] + + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_Nrounds_offsetA03] + stm r0!, {r1,r2,r3} + + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + + add sp, #Xoodoo_Permute_Nrounds_SAS + pop {r0-r4,r7} + mov r8, r1 + mov r9, r2 + mov r10, r3 + mov r11, r4 + pop {r4-r6,pc} + .align 4 + + +Xoodoo_Permute_RoundConstants: + .long 0x00000058 + .long 0x00000038 + .long 0x000003C0 + .long 0x000000D0 + .long 0x00000120 + .long 0x00000014 + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 +Xoodoo_Permute_RoundConstants12: + .long 0 + .align 4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_6rounds( void *state ) +@ +.global Xoodoo_Permute_6rounds +.type Xoodoo_Permute_6rounds, %function; +Xoodoo_Permute_6rounds: + movs r1, #6 + b Xoodoo_Permute_Nrounds + .align 4 + + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_12rounds( void *state ) +@ +.global Xoodoo_Permute_12rounds +.type Xoodoo_Permute_12rounds, %function; +Xoodoo_Permute_12rounds: + movs r1, #12 + b Xoodoo_Permute_Nrounds + .align 4 + + + +.equ Xoofff_BlockSize , 3*4*4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen) +.global Xoofff_AddIs +.type Xoofff_AddIs, %function; +Xoofff_AddIs: + push {r4-r6,lr} + movs r3, r0 @ check input and output pointer both 32-bit .align 8ed + orrs r3, r3, r1 + lsls r3, r3, #30 + bne Xoofff_AddIs_Bytes + subs r2, r2, #16*8 + bcc Xoofff_AddIs_LessThan16 +Xoofff_AddIs_16Loop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldmia r1!, {r5,r6} + eors r3, r3, r5 + eors r4, r4, r6 + stmia r0!, {r3,r4} + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldmia r1!, {r5,r6} + eors r3, r3, r5 + eors r4, r4, r6 + stmia r0!, {r3,r4} + subs r2, r2, #16*8 + bcs Xoofff_AddIs_16Loop +Xoofff_AddIs_LessThan16: + adds r2, r2, #16*8 + beq Xoofff_AddIs_Return + subs r2, r2, #4*8 + bcc Xoofff_AddIs_LessThan4 +Xoofff_AddIs_4Loop: + ldr r3, [r0] + ldmia r1!, {r4} + eors r3, r3, r4 + stmia r0!, {r3} + subs r2, r2, #4*8 + bcs Xoofff_AddIs_4Loop +Xoofff_AddIs_LessThan4: + adds r2, r2, #4*8 + beq Xoofff_AddIs_Return +Xoofff_AddIs_Bytes: + subs r2, r2, #8 + bcc Xoofff_AddIs_LessThan1 +Xoofff_AddIs_1Loop: + ldrb r3, [r0] + ldrb r4, [r1] + adds r1, r1, #1 + eors r3, r3, r4 + strb r3, [r0] + adds r0, r0, #1 + subs r2, r2, #8 + bcs Xoofff_AddIs_1Loop +Xoofff_AddIs_LessThan1: + adds r2, r2, #8 + beq Xoofff_AddIs_Return + ldrb r3, [r0] + ldrb r4, [r1] + movs r1, #1 + eors r3, r3, r4 + lsls r1, r1, r2 + subs r1, r1, #1 + ands r3, r3, r1 + strb r3, [r0] +Xoofff_AddIs_Return: + pop {r4-r6,pc} + .align 4 + + +.macro mLdu rv, ri, tt + ldrb \rv, [\ri, #3] + lsls \rv, \rv, #8 + ldrb \tt, [\ri, #2] + orrs \rv, \rv, \tt + lsls \rv, \rv, #8 + ldrb \tt, [\ri, #1] + orrs \rv, \rv, \tt + lsls \rv, \rv, #8 + ldrb \tt, [\ri, #0] + orrs \rv, \rv, \tt + adds \ri, \ri, #4 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length) +@ + +@ offsets on stack +.equ Xoofff_CompressFastLoop_offsetA03 , 0 +.equ Xoofff_CompressFastLoop_offsetRC , 4 +.equ Xoofff_CompressFastLoop_SAS , 8 +.equ Xoofff_CompressFastLoop_kRoll , Xoofff_CompressFastLoop_SAS+0 +.equ Xoofff_CompressFastLoop_input , Xoofff_CompressFastLoop_SAS+4 +.equ Xoofff_CompressFastLoop_xAccu , Xoofff_CompressFastLoop_SAS+8+16 +.equ Xoofff_CompressFastLoop_iInput , Xoofff_CompressFastLoop_SAS+12+16 +.equ Xoofff_CompressFastLoop_length , Xoofff_CompressFastLoop_SAS+16+16 + +.global Xoofff_CompressFastLoop +.type Xoofff_CompressFastLoop, %function; +Xoofff_CompressFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r0,r2,r4-r7} + sub sp, #Xoofff_CompressFastLoop_SAS + ldm r0!, {r3,r5,r6,r7} @ get initial kRoll + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoofff_CompressFastLoop_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoofff_CompressFastLoop_Loop: + adr r1, Xoofff_CompressFastLoop_RoundConstants6 + str r1, [sp, #Xoofff_CompressFastLoop_offsetRC] + + ldr r0, [sp, #Xoofff_CompressFastLoop_input] @ add input + lsls r1, r0, #30 + bne Xoofff_CompressFastLoop_Unaligned + +Xoofff_CompressFastLoop_Aligned: + ldmia r0!, {r1} + eors r3, r3, r1 + ldmia r0!, {r1} + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + ldmia r0!, {r1} + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + ldmia r0!, {r1} + ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + + ldmia r0!, {r1} + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + ldmia r0!, {r1} + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + ldmia r0!, {r1} + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + ldmia r0!, {r1} + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + ldmia r0!, {r1,r2} + eors r4, r4, r1 + eors r5, r5, r2 + ldmia r0!, {r1,r2} + eors r6, r6, r1 + eors r7, r7, r2 + + b Xoofff_CompressFastLoop_Permute + .align 4 +Xoofff_CompressFastLoop_RoundConstants6: + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 + .long 0 + +Xoofff_CompressFastLoop_Unaligned: + mLdu r1, r0, r2 + eors r3, r3, r1 + mLdu r1, r0, r2 + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + mLdu r1, r0, r2 + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + mLdu r1, r0, r2 + ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + + mLdu r1, r0, r2 + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + mLdu r1, r0, r2 + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + mLdu r1, r0, r2 + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + mLdu r1, r0, r2 + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + mLdu r1, r0, r2 + eors r4, r4, r1 + mLdu r1, r0, r2 + eors r5, r5, r1 + mLdu r1, r0, r2 + eors r6, r6, r1 + mLdu r1, r0, r2 + eors r7, r7, r1 + +Xoofff_CompressFastLoop_Permute: + str r0, [sp, #Xoofff_CompressFastLoop_input] +Xoofff_CompressFastLoop_PermuteLoop: + mRound Xoofff_CompressFastLoop_offsetRC, Xoofff_CompressFastLoop_offsetA03 + ldr r0, [sp, #Xoofff_CompressFastLoop_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoofff_CompressFastLoop_PermuteDone + b Xoofff_CompressFastLoop_PermuteLoop +Xoofff_CompressFastLoop_PermuteDone: + + @ Extract and add into xAccu + ldr r0, [sp, #Xoofff_CompressFastLoop_xAccu] + + ldr r1, [r0] + eors r1, r1, r3 + stmia r0!, {r1} + + ldr r1, [r0] + mov r2, r8 + eors r1, r1, r2 + stmia r0!, {r1} + + ldr r1, [r0] + mov r2, r9 + eors r1, r1, r2 + stmia r0!, {r1} + + ldr r1, [r0] + ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + eors r1, r1, r2 + stmia r0!, {r1} + + + ldr r1, [r0] + mov r2, r10 + eors r1, r1, r2 + stmia r0!, {r1} + ldr r1, [r0] + mov r2, r11 + eors r1, r1, r2 + stmia r0!, {r1} + ldr r1, [r0] + mov r2, r12 + eors r1, r1, r2 + stmia r0!, {r1} + ldr r1, [r0] + mov r2, lr + eors r1, r1, r2 + stmia r0!, {r1} + + ldr r1, [r0, #0] + ldr r2, [r0, #4] + ldr r3, [r0, #8] + eors r1, r1, r4 + ldr r4, [r0, #12] + eors r2, r2, r5 + eors r3, r3, r6 + eors r4, r4, r7 + stm r0!, {r1,r2,r3,r4} + + @roll kRoll-c + ldr r0, [sp, #Xoofff_CompressFastLoop_kRoll] + ldmia r0!, {r7} + ldmia r0!, {r4-r6} + ldmia r0!, {r3} + ldmia r0!, {r1,r2} + mov r8, r1 + mov r9, r2 + ldmia r0!, {r1,r2} + str r1, [sp, #Xoofff_CompressFastLoop_offsetA03] + mov r10, r2 + ldmia r0!, {r1,r2} + mov r11, r1 + mov r12, r2 + ldmia r0!, {r1} + mov lr, r1 + + lsls r1, r7, #13 + eors r7, r7, r1 + mov r1, r3 + movs r2, #32-3 + rors r1, r1, r2 + eors r7, r7, r1 + + subs r0, r0, #Xoofff_BlockSize + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1,r2} + ldr r1, [sp, #Xoofff_CompressFastLoop_offsetA03] + mov r2, r10 + stmia r0!, {r1,r2} + mov r1, r11 + mov r2, r12 + stmia r0!, {r1,r2} + mov r1, lr + stmia r0!, {r1,r4-r7} + + @ loop management + ldr r0, [sp, #Xoofff_CompressFastLoop_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_CompressFastLoop_length] + bcc Xoofff_CompressFastLoop_Done + b Xoofff_CompressFastLoop_Loop +Xoofff_CompressFastLoop_Done: + @ return number of bytes processed + ldr r0, [sp, #Xoofff_CompressFastLoop_input] + ldr r1, [sp, #Xoofff_CompressFastLoop_iInput] + subs r0, r0, r1 + add sp, #Xoofff_CompressFastLoop_SAS+8 + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r1-r7,pc} + .align 4 + + +.macro mStu rv, ro + strb \rv, [\ro, #0] + lsrs \rv, \rv, #8 + strb \rv, [\ro, #1] + lsrs \rv, \rv, #8 + strb \rv, [\ro, #2] + lsrs \rv, \rv, #8 + strb \rv, [\ro, #3] + adds \ro, \ro, #4 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length) +@ + +@ offsets on stack +.equ Xoofff_ExpandFastLoop_offsetA03, 0 +.equ Xoofff_ExpandFastLoop_offsetRC , 4 +.equ Xoofff_ExpandFastLoop_SAS , 8 +.equ Xoofff_ExpandFastLoop_yAccu , Xoofff_ExpandFastLoop_SAS+0 +.equ Xoofff_ExpandFastLoop_output , Xoofff_ExpandFastLoop_SAS+4 +.equ Xoofff_ExpandFastLoop_kRoll , Xoofff_ExpandFastLoop_SAS+8+16 +.equ Xoofff_ExpandFastLoop_iOutput , Xoofff_ExpandFastLoop_SAS+12+16 +.equ Xoofff_ExpandFastLoop_length , Xoofff_ExpandFastLoop_SAS+16+16 + +.global Xoofff_ExpandFastLoop +.type Xoofff_ExpandFastLoop, %function; +Xoofff_ExpandFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r0,r2,r4-r7} + sub sp, #Xoofff_ExpandFastLoop_SAS + + ldm r0!, {r3,r5,r6,r7} @ get initial yAccu + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoofff_ExpandFastLoop_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoofff_ExpandFastLoop_Loop: + adr r1, Xoofff_ExpandFastLoop_RoundConstants6 + str r1, [sp, #Xoofff_ExpandFastLoop_offsetRC] +Xoofff_ExpandFastLoop_PermuteLoop: + mRound Xoofff_ExpandFastLoop_offsetRC, Xoofff_ExpandFastLoop_offsetA03 + ldr r0, [sp, #Xoofff_ExpandFastLoop_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoofff_ExpandFastLoop_PermuteDone + b Xoofff_ExpandFastLoop_PermuteLoop +Xoofff_ExpandFastLoop_RoundConstants6: + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 + .long 0 +Xoofff_ExpandFastLoop_PermuteDone: + @ Add k and extract + ldr r0, [sp, #Xoofff_ExpandFastLoop_kRoll] + ldr r1, [sp, #Xoofff_ExpandFastLoop_output] @ add input + lsls r2, r1, #30 + bne Xoofff_ExpandFastLoop_Unaligned +Xoofff_ExpandFastLoop_Aligned: + ldmia r0!, {r2} + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r8 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r9 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + ldr r3, [sp, #Xoofff_ExpandFastLoop_offsetA03] + eors r2, r2, r3 + stmia r1!, {r2} + + ldmia r0!, {r2} + mov r3, r10 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r11 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r12 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, lr + eors r2, r2, r3 + stmia r1!, {r2} + + ldmia r0!, {r2,r3} + eors r2, r2, r4 + eors r3, r3, r5 + stmia r1!, {r2,r3} + ldmia r0!, {r2,r3} + eors r2, r2, r6 + eors r3, r3, r7 + stmia r1!, {r2,r3} + b Xoofff_ExpandFastLoop_ExtractDone + +Xoofff_ExpandFastLoop_Unaligned: + ldmia r0!, {r2} + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r8 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r9 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + ldr r3, [sp, #Xoofff_ExpandFastLoop_offsetA03] + eors r2, r2, r3 + mStu r2, r1 + + ldmia r0!, {r2} + mov r3, r10 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r11 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r12 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, lr + eors r2, r2, r3 + mStu r2, r1 + + ldmia r0!, {r2,r3} + eors r2, r2, r4 + mStu r2, r1 + eors r3, r3, r5 + mStu r3, r1 + ldmia r0!, {r2,r3} + eors r2, r2, r6 + mStu r2, r1 + eors r3, r3, r7 + mStu r3, r1 + +Xoofff_ExpandFastLoop_ExtractDone: + str r1, [sp, #Xoofff_ExpandFastLoop_output] + + @ roll-e yAccu + ldr r0, [sp, #Xoofff_ExpandFastLoop_yAccu] + ldmia r0!, {r7} + ldmia r0!, {r4-r6} + ldmia r0!, {r3} + ldmia r0!, {r1,r2} + mov r8, r1 + mov r9, r2 + ldmia r0!, {r1,r2} + str r1, [sp, #Xoofff_ExpandFastLoop_offsetA03] + mov r10, r2 + ldmia r0!, {r1,r2} + mov r11, r1 + mov r12, r2 + ldmia r0!, {r1} + mov lr, r1 + + mov r1, r10 + ands r1, r1, r3 + movs r2, #32-5 + rors r7, r7, r2 + eors r7, r7, r1 + movs r2, #32-13 + mov r1, r3 + rors r1, r1, r2 + eors r7, r7, r1 + movs r1, #7 + eors r7, r7, r1 + + subs r0, r0, #Xoofff_BlockSize + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1,r2} + ldr r1, [sp, #Xoofff_ExpandFastLoop_offsetA03] + mov r2, r10 + stmia r0!, {r1,r2} + mov r1, r11 + mov r2, r12 + stmia r0!, {r1,r2} + mov r1, lr + stmia r0!, {r1,r4-r7} + + @ loop management + ldr r0, [sp, #Xoofff_ExpandFastLoop_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_ExpandFastLoop_length] + bcc Xoofff_ExpandFastLoop_Done + b Xoofff_ExpandFastLoop_Loop +Xoofff_ExpandFastLoop_Done: + @ return number of bytes processed + ldr r0, [sp, #Xoofff_ExpandFastLoop_output] + ldr r1, [sp, #Xoofff_ExpandFastLoop_iOutput] + subs r0, r0, r1 + add sp, #Xoofff_ExpandFastLoop_SAS+8 + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r1-r7,pc} + .align 4 + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodoo.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodyak-u1-armv6m-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodyak-u1-armv6m-le-gcc.s new file mode 100644 index 0000000..91ab5a2 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodyak-u1-armv6m-le-gcc.s @@ -0,0 +1,1165 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6m architecture (e.g., Cortex-M0). + + + .thumb + .syntax unified +.text + +@ ---------------------------------------------------------------------------- + +@ offsets in RAM state +.equ _oA00 , 0*4 +.equ _oA01 , 1*4 +.equ _oA02 , 2*4 +.equ _oA03 , 3*4 +.equ _oA10 , 4*4 +.equ _oA11 , 5*4 +.equ _oA12 , 6*4 +.equ _oA13 , 7*4 +.equ _oA20 , 8*4 +.equ _oA21 , 9*4 +.equ _oA22 , 10*4 +.equ _oA23 , 11*4 + +@ possible locations of state lanes +.equ locRegL , 1 +.equ locRegH , 2 +.equ locMem , 3 + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _r2 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +@ ---------------------------------------------------------------------------- + +.macro mLoadU r, p, o, t + ldrb \r, [\p, #\o+0] + ldrb \t, [\p, #\o+1] + lsls \t, \t, #8 + orrs \r, \r, \t + ldrb \t, [\p, #\o+2] + lsls \t, \t, #16 + orrs \r, \r, \t + ldrb \t, [\p, #\o+3] + lsls \t, \t, #24 + orrs \r, \r, \t + .endm + +.macro mStoreU p, o, s, t, loc + .if \loc == locRegL + strb \s, [\p, #\o+0] + lsrs \t, \s, #8 + .else + mov \t, \s + strb \t, [\p, #\o+0] + lsrs \t, \t, #8 + .endif + strb \t, [\p, #\o+1] + lsrs \t, \t, #8 + strb \t, [\p, #\o+2] + lsrs \t, \t, #8 + strb \t, [\p, #\o+3] + .endm + +.macro mXor3 ro, a0, a1, a2, loc, tt + mov \ro, \a1 + eors \ro, \ro, \a2 + .if \loc == locRegL + eors \ro, \ro, \a0 + .else + .if \loc == locRegH + mov \tt, \a0 + .else + ldr \tt, [sp, #\a0] + .endif + eors \ro, \ro, \tt + .endif + .endm + +.macro mXor ro, ri, tt, loc + .if \loc == locRegL + eors \ro, \ro, \ri + .else + .if \loc == locRegH + mov \tt, \ro + eors \tt, \tt, \ri + mov \ro, \tt + .else + ldr \tt, [sp, #\ro] + eors \tt, \tt, \ri + str \tt, [sp, #\ro] + .endif + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1,a0s,loc + mov \r1, \a2 + mov \r0, \a1 + bics \r1, \r1, \r0 + eors \a0, \a0, \r1 + .if \loc != locRegL + .if \loc == locRegH + mov \a0s, \a0 + .else + str \a0, [sp, #\a0s] + .endif + .endif + + mov \r0, \a0 + bics \r0, \r0, \a2 + mov \r1, \a1 + eors \r1, \r1, \r0 + mov \a1, \r1 + + bics \r1, \r1, \a0 + eors \a2, \a2, \r1 + .endm + +.macro mRound offsetRC, offsetA03 + + @ Theta: Column Parity Mixer + mXor3 r0, \offsetA03, lr, r7, locMem, r2 + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r3, r10, r4, locRegL, r2 + mXor r3, r1, r2, locRegL + mXor r10, r1, r2, locRegH + mXor r4, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r8, r11, r5, locRegH, r2 + mXor r8, r1, r2, locRegH + mXor r11, r1, r2, locRegH + mXor r5, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r9, r12, r6, locRegH, r2 + mXor r9, r1, r2, locRegH + mXor r12, r1, r2, locRegH + mXor r6, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor \offsetA03, r1, r2, locMem + mXor lr, r1, r2, locRegH + mXor r7, r1, r2, locRegL + + @ Rho-west: Plane shift + movs r0, #32-_w1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + mov r0, lr + mov lr, r12 + mov r12, r11 + mov r11, r10 + mov r10, r0 + + @ Iota: round constant + ldr r0, [sp, #\offsetRC] + ldmia r0!, {r1} + str r0, [sp, #\offsetRC] + eors r3, r3, r1 + + @ Chi: non linear step, on colums + mChi3 r3, r10, r4, r0, r1, r3, locRegL + mov r2, r8 + mChi3 r2, r11, r5, r0, r1, r8, locRegH + mov r2, r9 + mChi3 r2, r12, r6, r0, r1, r9, locRegH + ldr r2, [sp, #\offsetA03] + mChi3 r2, lr, r7, r0, r1, \offsetA03, locMem + + @ Rho-east: Plane shift + movs r0, #32-1 + mov r1, r10 + rors r1, r1, r0 + mov r10, r1 + mov r1, r11 + rors r1, r1, r0 + mov r11, r1 + mov r1, r12 + rors r1, r1, r0 + mov r12, r1 + mov r1, lr + rors r1, r1, r0 + mov lr, r1 + + movs r0, #32-_e1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + + mov r0, r4 + mov r4, r6 + mov r6, r0 + mov r0, r5 + mov r5, r7 + mov r7, r0 + + .endm + +@ ---------------------------------------------------------------------------- +@ +@ Xoodoo_Permute_12roundsAsm +@ + +@ offsets on stack +.equ Xoodoo_Permute_12rounds_offsetA03 , 0 +.equ Xoodoo_Permute_12rounds_offsetRC , 4 +.equ Xoodoo_Permute_12rounds_offsetReturn, 8 +.equ Xoodoo_Permute_12rounds_SAS , 12 + + .align 4 +.type Xoodoo_Permute_12roundsAsm, %function; +Xoodoo_Permute_12roundsAsm: + adr r2, Xoodoo_Permute_RoundConstants12 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetRC] +Xoodoo_Permute_12rounds_Loop: + mRound Xoodoo_Permute_12rounds_offsetRC, Xoodoo_Permute_12rounds_offsetA03 + ldr r0, [sp, #Xoodoo_Permute_12rounds_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoodoo_Permute_12rounds_Done + b Xoodoo_Permute_12rounds_Loop +Xoodoo_Permute_12rounds_Done: + ldr r0, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + bx r0 + .align 4 +Xoodoo_Permute_RoundConstants12: + .long 0x00000058 + .long 0x00000038 + .long 0x000003C0 + .long 0x000000D0 + .long 0x00000120 + .long 0x00000014 + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 + .long 0 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@ +@ X += Xoodyak_Rkin@ +@ XLen -= Xoodyak_Rkin@ +@ } while (XLen >= Xoodyak_Rkin)@ +@ return initialLength - XLen@ +@ } +@ +.equ XoodyakAbsorb_offsetState , (Xoodoo_Permute_12rounds_SAS+0) +.equ XoodyakAbsorb_offsetX , (Xoodoo_Permute_12rounds_SAS+4) +.equ XoodyakAbsorb_offsetXLen , (Xoodoo_Permute_12rounds_SAS+8) +.equ XoodyakAbsorb_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+12) + +.equ XoodyakAbsorb_SAS , (Xoodoo_Permute_12rounds_SAS+20) + + .align 4 +.global Xoodyak_AbsorbKeyedFullBlocks +.type Xoodyak_AbsorbKeyedFullBlocks, %function; +Xoodyak_AbsorbKeyedFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakAbsorb_SAS + str r0, [sp, #XoodyakAbsorb_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakAbsorb_offsetX] + str r2, [sp, #XoodyakAbsorb_offsetInitialLen] + subs r2, r2, #44 + str r2, [sp, #XoodyakAbsorb_offsetXLen] + ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_AbsorbKeyedFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_AbsorbKeyedFullBlocks_Ret: + ldr r0, [sp, #XoodyakAbsorb_offsetX] + lsls r1, r0, #30 + bne Xoodyak_AbsorbKeyedFullBlocks_Unaligned +Xoodyak_AbsorbKeyedFullBlocks_Aligned: + ldmia r0!, {r1} + eors r3, r3, r1 + ldmia r0!, {r1} + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + ldmia r0!, {r1} + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + ldmia r0!, {r1} + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + + ldmia r0!, {r1} + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + ldmia r0!, {r1} + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + ldmia r0!, {r1} + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + ldmia r0!, {r1} + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + ldmia r0!, {r1} + eors r4, r4, r1 + ldmia r0!, {r1} + eors r5, r5, r1 + ldmia r0!, {r1} + eors r6, r6, r1 +Xoodyak_AbsorbKeyedFullBlocks_EndLoop: + str r0, [sp, #XoodyakAbsorb_offsetX] + movs r2, #1 + eors r7, r7, r2 + ldr r1, [sp, #XoodyakAbsorb_offsetXLen] + subs r1, r1, #44 + str r1, [sp, #XoodyakAbsorb_offsetXLen] + bcs Xoodyak_AbsorbKeyedFullBlocks_Loop + ldr r0, [sp, #XoodyakAbsorb_offsetState] + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + + ldr r0, [sp, #XoodyakAbsorb_offsetInitialLen] + ldr r2, [sp, #XoodyakAbsorb_offsetXLen] + adds r2, r2, #44 + subs r0, r0, r2 + + add sp, #XoodyakAbsorb_SAS + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_AbsorbKeyedFullBlocks_Unaligned: + mLoadU r1, r0, 0, r2 + eors r3, r3, r1 + + mLoadU r1, r0, 4, r2 + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + + mLoadU r1, r0, 8, r2 + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + + mLoadU r1, r0, 12, r2 + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + + mLoadU r1, r0, 16, r2 + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + + mLoadU r1, r0, 20, r2 + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + + mLoadU r1, r0, 24, r2 + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + + mLoadU r1, r0, 28, r2 + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + adds r0, r0, #32 + mLoadU r1, r0, 0, r2 + eors r4, r4, r1 + mLoadU r1, r0, 4, r2 + eors r5, r5, r1 + mLoadU r1, r0, 8, r2 + eors r6, r6, r1 + adds r0, r0, #12 + b Xoodyak_AbsorbKeyedFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@ +@ X += Xoodyak_Rhash@ +@ XLen -= Xoodyak_Rhash@ +@ } while (XLen >= Xoodyak_Rhash)@ +@ return initialLength - XLen@ +@ } +@ + .align 4 +.global Xoodyak_AbsorbHashFullBlocks +.type Xoodyak_AbsorbHashFullBlocks, %function; +Xoodyak_AbsorbHashFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakAbsorb_SAS + str r0, [sp, #XoodyakAbsorb_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakAbsorb_offsetX] + str r2, [sp, #XoodyakAbsorb_offsetInitialLen] + subs r2, r2, #16 + str r2, [sp, #XoodyakAbsorb_offsetXLen] + ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_AbsorbHashFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_AbsorbHashFullBlocks_Ret: + ldr r0, [sp, #XoodyakAbsorb_offsetX] + lsls r1, r0, #30 + bne Xoodyak_AbsorbHashFullBlocks_Unaligned +Xoodyak_AbsorbHashFullBlocks_Aligned: + ldmia r0!, {r1} + eors r3, r3, r1 + ldmia r0!, {r1} + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + ldmia r0!, {r1} + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + ldmia r0!, {r1} + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] +Xoodyak_AbsorbHashFullBlocks_EndLoop: + str r0, [sp, #XoodyakAbsorb_offsetX] + movs r2, #1 + mov r1, r10 + eors r1, r1, r2 + mov r10, r1 + ldr r1, [sp, #XoodyakAbsorb_offsetXLen] + subs r1, r1, #16 + str r1, [sp, #XoodyakAbsorb_offsetXLen] + bcs Xoodyak_AbsorbHashFullBlocks_Loop + ldr r0, [sp, #XoodyakAbsorb_offsetState] + + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + + ldr r0, [sp, #XoodyakAbsorb_offsetInitialLen] + ldr r2, [sp, #XoodyakAbsorb_offsetXLen] + adds r2, r2, #16 + subs r0, r0, r2 + + add sp, #XoodyakAbsorb_SAS + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_AbsorbHashFullBlocks_Unaligned: + mLoadU r1, r0, 0, r2 + eors r3, r3, r1 + mLoadU r1, r0, 4, r2 + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + mLoadU r1, r0, 8, r2 + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + mLoadU r1, r0, 12, r2 + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + adds r0, r0, #16 + b Xoodyak_AbsorbHashFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@ +@ Y += Xoodyak_Rkout@ +@ YLen -= Xoodyak_Rkout@ +@ } while (YLen >= Xoodyak_Rkout)@ +@ return initialLength - YLen@ +@ } +@ +.equ XoodyakSqueeze_offsetState , (Xoodoo_Permute_12rounds_SAS+0) +.equ XoodyakSqueeze_offsetY , (Xoodoo_Permute_12rounds_SAS+4) +.equ XoodyakSqueeze_offsetYLen , (Xoodoo_Permute_12rounds_SAS+8) +.equ XoodyakSqueeze_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+12) + +.equ XoodyakSqueeze_SAS , (Xoodoo_Permute_12rounds_SAS+20) + + .align 4 +.global Xoodyak_SqueezeKeyedFullBlocks +.type Xoodyak_SqueezeKeyedFullBlocks, %function; +Xoodyak_SqueezeKeyedFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakSqueeze_SAS + str r0, [sp, #XoodyakSqueeze_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakSqueeze_offsetY] + str r2, [sp, #XoodyakSqueeze_offsetInitialLen] + subs r2, r2, #24 + str r2, [sp, #XoodyakSqueeze_offsetYLen] + ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_SqueezeKeyedFullBlocks_Loop: + movs r0, #1 + eors r3, r3, r0 + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_SqueezeKeyedFullBlocks_Ret: + ldr r0, [sp, #XoodyakSqueeze_offsetY] + lsls r1, r0, #30 + bne Xoodyak_SqueezeKeyedFullBlocks_Unaligned +Xoodyak_SqueezeKeyedFullBlocks_Aligned: + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1, r2} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + mov r2, r10 + stmia r0!, {r1, r2} + mov r1, r11 + stmia r0!, {r1} +Xoodyak_SqueezeKeyedFullBlocks_EndLoop: + str r0, [sp, #XoodyakSqueeze_offsetY] + ldr r1, [sp, #XoodyakSqueeze_offsetYLen] + subs r1, r1, #24 + str r1, [sp, #XoodyakSqueeze_offsetYLen] + bcs Xoodyak_SqueezeKeyedFullBlocks_Loop + ldr r0, [sp, #XoodyakSqueeze_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakSqueeze_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakSqueeze_offsetYLen] + adds r2, r2, #24 + subs r0, r0, r2 + add sp, #XoodyakSqueeze_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_SqueezeKeyedFullBlocks_Unaligned: + mStoreU r0, 0, r3, r2, locRegL + mStoreU r0, 4, r8, r2, locRegH + mStoreU r0, 8, r9, r2, locRegH + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + mStoreU r0, 12, r1, r2, locRegL + mStoreU r0, 16, r10, r2, locRegH + mStoreU r0, 20, r11, r2, locRegH + adds r0, r0, #24 + b Xoodyak_SqueezeKeyedFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@ +@ Y += Xoodyak_Rhash@ +@ YLen -= Xoodyak_Rhash@ +@ } while (YLen >= Xoodyak_Rhash)@ +@ return initialLength - YLen@ +@ } +@ + .align 4 +.global Xoodyak_SqueezeHashFullBlocks +.type Xoodyak_SqueezeHashFullBlocks, %function; +Xoodyak_SqueezeHashFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakSqueeze_SAS + str r0, [sp, #XoodyakSqueeze_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakSqueeze_offsetY] + str r2, [sp, #XoodyakSqueeze_offsetInitialLen] + subs r2, r2, #16 + str r2, [sp, #XoodyakSqueeze_offsetYLen] + ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_SqueezeHashFullBlocks_Loop: + movs r0, #1 + eors r3, r3, r0 + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_SqueezeHashFullBlocks_Ret: + ldr r0, [sp, #XoodyakSqueeze_offsetY] + lsls r1, r0, #30 + bne Xoodyak_SqueezeHashFullBlocks_Unaligned +Xoodyak_SqueezeHashFullBlocks_Aligned: + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1, r2} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stmia r0!, {r1} +Xoodyak_SqueezeHashFullBlocks_EndLoop: + str r0, [sp, #XoodyakSqueeze_offsetY] + ldr r1, [sp, #XoodyakSqueeze_offsetYLen] + subs r1, r1, #16 + str r1, [sp, #XoodyakSqueeze_offsetYLen] + bcs Xoodyak_SqueezeHashFullBlocks_Loop + ldr r0, [sp, #XoodyakSqueeze_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakSqueeze_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakSqueeze_offsetYLen] + adds r2, r2, #16 + subs r0, r0, r2 + add sp, #XoodyakSqueeze_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_SqueezeHashFullBlocks_Unaligned: + mStoreU r0, 0, r3, r2, locRegL + mStoreU r0, 4, r8, r2, locRegH + mStoreU r0, 8, r9, r2, locRegH + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + mStoreU r0, 12, r1, r2, locRegL + adds r0, r0, #16 + b Xoodyak_SqueezeHashFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ return initialLength - IOLen@ +@ } +@ +.equ XoodyakCrypt_offsetState , (Xoodoo_Permute_12rounds_SAS+0) +.equ XoodyakCrypt_offsetI , (Xoodoo_Permute_12rounds_SAS+4) +.equ XoodyakCrypt_offsetO , (Xoodoo_Permute_12rounds_SAS+8) +.equ XoodyakCrypt_offsetIOLen , (Xoodoo_Permute_12rounds_SAS+12) +.equ XoodyakCrypt_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+16) +.equ XoodyakCrypt_SAS , (Xoodoo_Permute_12rounds_SAS+20) + + .align 4 +.global Xoodyak_EncryptFullBlocks +.type Xoodyak_EncryptFullBlocks, %function; +Xoodyak_EncryptFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakCrypt_SAS + str r0, [sp, #XoodyakCrypt_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakCrypt_offsetI] + str r2, [sp, #XoodyakCrypt_offsetO] + str r3, [sp, #XoodyakCrypt_offsetInitialLen] + subs r3, r3, #24 + str r3, [sp, #XoodyakCrypt_offsetIOLen] + ldr r5, =Xoodyak_EncryptFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_EncryptFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_EncryptFullBlocks_Ret: + push {r4, r5} + ldr r5, [sp, #XoodyakCrypt_offsetI+8] + ldr r4, [sp, #XoodyakCrypt_offsetO+8] + mov r0, r4 + ands r0, r0, r5 + lsls r0, r0, #30 + bne Xoodyak_EncryptFullBlocks_Unaligned +Xoodyak_EncryptFullBlocks_Aligned: + ldmia r5!, {r0} + eors r3, r3, r0 + stmia r4!, {r3} + + ldmia r5!, {r0} + mov r1, r8 + eors r1, r1, r0 + stmia r4!, {r1} + mov r8, r1 + + ldmia r5!, {r0} + mov r1, r9 + eors r1, r1, r0 + stmia r4!, {r1} + mov r9, r1 + + ldmia r5!, {r0} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + stmia r4!, {r1} + str r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + ldmia r5!, {r0} + mov r1, r10 + eors r1, r1, r0 + stmia r4!, {r1} + mov r10, r1 + + ldmia r5!, {r0} + mov r1, r11 + eors r1, r1, r0 + stmia r4!, {r1} + mov r11, r1 +Xoodyak_EncryptFullBlocks_EndLoop: + movs r0, #1 + mov r1, r12 + eors r1, r1, r0 + mov r12, r1 + str r5, [sp, #XoodyakCrypt_offsetI+8] + str r4, [sp, #XoodyakCrypt_offsetO+8] + pop {r4, r5} + ldr r1, [sp, #XoodyakCrypt_offsetIOLen] + subs r1, r1, #24 + str r1, [sp, #XoodyakCrypt_offsetIOLen] + bcs Xoodyak_EncryptFullBlocks_Loop + ldr r0, [sp, #XoodyakCrypt_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakCrypt_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakCrypt_offsetIOLen] + adds r2, r2, #24 + subs r0, r0, r2 + add sp, #XoodyakCrypt_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_EncryptFullBlocks_Unaligned: + mLoadU r0, r5, 0, r2 + eors r3, r3, r0 + mStoreU r4, 0, r3, r2, locRegL + + mLoadU r0, r5, 4, r2 + mov r1, r8 + eors r1, r1, r0 + mStoreU r4, 4, r1, r2, locRegL + mov r8, r1 + + mLoadU r0, r5, 8, r2 + mov r1, r9 + eors r1, r1, r0 + mStoreU r4, 8, r1, r2, locRegL + mov r9, r1 + + mLoadU r0, r5, 12, r2 + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + mStoreU r4, 12, r1, r2, locRegL + str r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + mLoadU r0, r5, 16, r2 + mov r1, r10 + eors r1, r1, r0 + mStoreU r4, 16, r1, r2, locRegL + mov r10, r1 + + mLoadU r0, r5, 20, r2 + mov r1, r11 + eors r1, r1, r0 + mStoreU r4, 20, r1, r2, locRegL + mov r11, r1 + + adds r4, r4, #24 + adds r5, r5, #24 + b Xoodyak_EncryptFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ return initialLength - IOLen@ +@ } +@ + .align 4 +.global Xoodyak_DecryptFullBlocks +.type Xoodyak_DecryptFullBlocks, %function; +Xoodyak_DecryptFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakCrypt_SAS + str r0, [sp, #XoodyakCrypt_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakCrypt_offsetI] + str r2, [sp, #XoodyakCrypt_offsetO] + str r3, [sp, #XoodyakCrypt_offsetInitialLen] + subs r3, r3, #24 + str r3, [sp, #XoodyakCrypt_offsetIOLen] + ldr r5, =Xoodyak_DecryptFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_DecryptFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_DecryptFullBlocks_Ret: + push {r4, r5} + ldr r5, [sp, #XoodyakCrypt_offsetI+8] + ldr r4, [sp, #XoodyakCrypt_offsetO+8] + mov r0, r4 + ands r0, r0, r5 + lsls r0, r0, #30 + bne Xoodyak_DecryptFullBlocks_Unaligned +Xoodyak_DecryptFullBlocks_Aligned: + ldmia r5!, {r0} + eors r3, r3, r0 + stmia r4!, {r3} + mov r3, r0 + + ldmia r5!, {r0} + mov r1, r8 + eors r1, r1, r0 + stmia r4!, {r1} + mov r8, r0 + + ldmia r5!, {r0} + mov r1, r9 + eors r1, r1, r0 + stmia r4!, {r1} + mov r9, r0 + + ldmia r5!, {r0} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + stmia r4!, {r1} + str r0, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + ldmia r5!, {r0} + mov r1, r10 + eors r1, r1, r0 + stmia r4!, {r1} + mov r10, r0 + + ldmia r5!, {r0} + mov r1, r11 + eors r1, r1, r0 + stmia r4!, {r1} + mov r11, r0 +Xoodyak_DecryptFullBlocks_EndLoop: + movs r0, #1 + mov r1, r12 + eors r1, r1, r0 + mov r12, r1 + str r5, [sp, #XoodyakCrypt_offsetI+8] + str r4, [sp, #XoodyakCrypt_offsetO+8] + pop {r4, r5} + ldr r1, [sp, #XoodyakCrypt_offsetIOLen] + subs r1, r1, #24 + str r1, [sp, #XoodyakCrypt_offsetIOLen] + bcs Xoodyak_DecryptFullBlocks_Loop + ldr r0, [sp, #XoodyakCrypt_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakCrypt_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakCrypt_offsetIOLen] + adds r2, r2, #24 + subs r0, r0, r2 + add sp, #XoodyakCrypt_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_DecryptFullBlocks_Unaligned: + mLoadU r0, r5, 0, r2 + eors r3, r3, r0 + mStoreU r4, 0, r3, r2, locRegL + mov r3, r0 + + mLoadU r0, r5, 4, r2 + mov r1, r8 + eors r1, r1, r0 + mStoreU r4, 4, r1, r2, locRegL + mov r8, r0 + + mLoadU r0, r5, 8, r2 + mov r1, r9 + eors r1, r1, r0 + mStoreU r4, 8, r1, r2, locRegL + mov r9, r0 + + mLoadU r0, r5, 12, r2 + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + mStoreU r4, 12, r1, r2, locRegL + str r0, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + mLoadU r0, r5, 16, r2 + mov r1, r10 + eors r1, r1, r0 + mStoreU r4, 16, r1, r2, locRegL + mov r10, r0 + + mLoadU r0, r5, 20, r2 + mov r1, r11 + eors r1, r1, r0 + mStoreU r4, 20, r1, r2, locRegL + mov r11, r0 + + adds r4, r4, #24 + adds r5, r5, #24 + b Xoodyak_DecryptFullBlocks_EndLoop + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodyak.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodyak.c new file mode 100644 index 0000000..e0b67b5 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/Xoodyak.c @@ -0,0 +1,53 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#include +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/align.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/api.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/brg_endian.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/config.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/encrypt.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/encrypt.c new file mode 100644 index 0000000..3090334 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv6M/encrypt.c @@ -0,0 +1,92 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_aead.h" +#include "api.h" +#include "Xoodyak.h" +#include + +#if !defined(CRYPTO_KEYBYTES) + #define CRYPTO_KEYBYTES 16 +#endif +#if !defined(CRYPTO_NPUBBYTES) + #define CRYPTO_NPUBBYTES 16 +#endif + +#define TAGLEN 16 + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + + (void)nsec; + + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Encrypt(&instance, m, c, (size_t)mlen); + Xoodyak_Squeeze(&instance, c + mlen, TAGLEN); + *clen = mlen + TAGLEN; + #if 0 + { + unsigned int i; + for (i = 0; i < *clen; ++i ) + { + printf("\\x%02x", c[i] ); + } + printf("\n"); + } + #endif + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + unsigned char tag[TAGLEN]; + unsigned long long mlen_; + + (void)nsec; + + *mlen = 0; + if (clen < TAGLEN) { + return -1; + } + mlen_ = clen - TAGLEN; + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_); + Xoodyak_Squeeze(&instance, tag, TAGLEN); + if (memcmp(tag, c + mlen_, TAGLEN) != 0) { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Cyclist.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Cyclist.inc b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Cyclist.inc new file mode 100644 index 0000000..ba7a156 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Cyclist.inc @@ -0,0 +1,327 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + assert(instance->mode == Cyclist_ModeKeyed); + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodoo-SnP.h new file mode 100644 index 0000000..7d0c98b --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodoo-SnP.h @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized ARM assembler implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#define Xoodoo_FastXoofff_supported +void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen); +size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length); +size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodoo-uf-armv7m-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodoo-uf-armv7m-le-gcc.s new file mode 100644 index 0000000..0b72ec8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodoo-uf-armv7m-le-gcc.s @@ -0,0 +1,729 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (Cortex-M3, ...). + + + .thumb + .syntax unified +.text + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Initialize(void *state) +@ + .align 4 +.global Xoodoo_Initialize +.type Xoodoo_Initialize, %function; +Xoodoo_Initialize: + movs r1, #0 + movs r2, #0 + movs r3, #0 + movs r12, #0 + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_AddBytes +.type Xoodoo_AddBytes, %function; +Xoodoo_AddBytes: + push {r4,lr} + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_AddBytes_Bytes +Xoodoo_AddBytes_LanesLoop: @ then, perform on lanes + ldr r2, [r0] + ldr r4, [r1], #4 + eors r2, r2, r4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_AddBytes_LanesLoop +Xoodoo_AddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_AddBytes_Exit +Xoodoo_AddBytes_BytesLoop: + ldrb r2, [r0] + ldrb r4, [r1], #1 + eors r2, r2, r4 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_AddBytes_BytesLoop +Xoodoo_AddBytes_Exit: + pop {r4,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_OverwriteBytes +.type Xoodoo_OverwriteBytes, %function; +Xoodoo_OverwriteBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_OverwriteBytes_Bytes +Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words + ldr r2, [r1], #4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_OverwriteBytes_LanesLoop +Xoodoo_OverwriteBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_OverwriteBytes_Exit +Xoodoo_OverwriteBytes_BytesLoop: + ldrb r2, [r1], #1 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_OverwriteBytes_BytesLoop +Xoodoo_OverwriteBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +@ +.global Xoodoo_OverwriteWithZeroes +.type Xoodoo_OverwriteWithZeroes, %function; +Xoodoo_OverwriteWithZeroes: + movs r3, #0 + lsrs r2, r1, #2 + beq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + str r3, [r0], #4 + subs r2, r2, #1 + bne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + ands r1, #3 + beq Xoodoo_OverwriteWithZeroes_Exit +Xoodoo_OverwriteWithZeroes_LoopBytes: + strb r3, [r0], #1 + subs r1, r1, #1 + bne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractBytes +.type Xoodoo_ExtractBytes, %function; +Xoodoo_ExtractBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractBytes_Bytes +Xoodoo_ExtractBytes_LanesLoop: @ then, handle words + ldr r2, [r0], #4 + str r2, [r1], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractBytes_LanesLoop +Xoodoo_ExtractBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractBytes_Exit +Xoodoo_ExtractBytes_BytesLoop: + ldrb r2, [r0], #1 + strb r2, [r1], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractBytes_BytesLoop +Xoodoo_ExtractBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractAndAddBytes +.type Xoodoo_ExtractAndAddBytes, %function; +Xoodoo_ExtractAndAddBytes: + push {r4,r5} + adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length) + ldr r3, [sp, #8] @ get length argument from stack + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractAndAddBytes_Bytes +Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, handle words + ldr r5, [r0], #4 + ldr r4, [r1], #4 + eors r5, r5, r4 + str r5, [r2], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractAndAddBytes_LanesLoop +Xoodoo_ExtractAndAddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractAndAddBytes_Exit +Xoodoo_ExtractAndAddBytes_BytesLoop: + ldrb r5, [r0], #1 + ldrb r4, [r1], #1 + eors r5, r5, r4 + strb r5, [r2], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractAndAddBytes_BytesLoop +Xoodoo_ExtractAndAddBytes_Exit: + pop {r4,r5} + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1, 0x00000003 +.equ _rc5x2, 0x0b000000 +.equ _rc4x3, 0x07000000 +.equ _rc3x4, 0x000f0000 +.equ _rc2x5, 0x0000d000 +.equ _rc1x6, 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_6rounds( void *state ) +@ +.global Xoodoo_Permute_6rounds +.type Xoodoo_Permute_6rounds, %function; +Xoodoo_Permute_6rounds: + push {r0,r4-r11,lr} + ldmia r0!, {r2-r5} + ldmia r0!, {r8-r9} + ldmia r0!, {r6-r7} + ldmia r0, {r10-r12,lr} + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + pop {r0,r1} + ror r2, r2, #32-(6*_r0)%32 + ror r3, r3, #32-(6*_r0)%32 + ror r4, r4, #32-(6*_r0)%32 + ror r5, r5, #32-(6*_r0)%32 + ror r6, r6, #32-(6*_r0+1)%32 + ror r7, r7, #32-(6*_r0+1)%32 + ror r8, r8, #32-(6*_r0+1)%32 + ror r9, r9, #32-(6*_r0+1)%32 + ror r10, r10, #32-(6*_r0+_e1+_w1)%32 + ror r11, r11, #32-(6*_r0+_e1+_w1)%32 + ror r12, r12, #32-(6*_r0+_e1+_w1)%32 + ror lr, lr, #32-(6*_r0+_e1+_w1)%32 + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_12rounds( void *state ) +@ +.global Xoodoo_Permute_12rounds +.type Xoodoo_Permute_12rounds, %function; +Xoodoo_Permute_12rounds: + push {r0,r4-r11,lr} + ldmia r0, {r2-r12,lr} + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + pop {r0,r1} + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + .align 4 + + +.equ Xoofff_BlockSize , 3*4*4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen) +.global Xoofff_AddIs +.type Xoofff_AddIs, %function; +Xoofff_AddIs: + push {r4-r10,lr} + + subs r2, r2, #Xoofff_BlockSize*8 + bcc Xoofff_AddIs_LessThanBlock +Xoofff_AddIs_BlockLoop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + subs r2, r2, #Xoofff_BlockSize*8 + bcs Xoofff_AddIs_BlockLoop +Xoofff_AddIs_LessThanBlock: + adds r2, r2, #Xoofff_BlockSize*8 + beq Xoofff_AddIs_Return + subs r2, r2, #16*8 + bcc Xoofff_AddIs_LessThan16 +Xoofff_AddIs_16Loop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + subs r2, r2, #16*8 + bcs Xoofff_AddIs_16Loop +Xoofff_AddIs_LessThan16: + adds r2, r2, #16*8 + beq Xoofff_AddIs_Return + subs r2, r2, #4*8 + bcc Xoofff_AddIs_LessThan4 +Xoofff_AddIs_4Loop: + ldr r3, [r0] + ldr r7, [r1], #4 + eors r3, r3, r7 + str r3, [r0], #4 + subs r2, r2, #4*8 + bcs Xoofff_AddIs_4Loop +Xoofff_AddIs_LessThan4: + adds r2, r2, #4*8 + beq Xoofff_AddIs_Return + subs r2, r2, #8 + bcc Xoofff_AddIs_LessThan1 +Xoofff_AddIs_1Loop: + ldrb r3, [r0] + ldrb r7, [r1], #1 + eors r3, r3, r7 + strb r3, [r0], #1 + subs r2, r2, #8 + bcs Xoofff_AddIs_1Loop +Xoofff_AddIs_LessThan1: + adds r2, r2, #8 + beq Xoofff_AddIs_Return + ldrb r3, [r0] + ldrb r7, [r1] + movs r1, #1 + eors r3, r3, r7 + lsls r1, r1, r2 + subs r1, r1, #1 + ands r3, r3, r1 + strb r3, [r0] +Xoofff_AddIs_Return: + pop {r4-r10,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length) +@ +.equ Xoofff_Compress_kRoll , 0 +.equ Xoofff_Compress_input , 4 +.equ Xoofff_Compress_xAccu , 8 +.equ Xoofff_Compress_iInput , 12 +.equ Xoofff_Compress_length , 16 + +.global Xoofff_CompressFastLoop +.type Xoofff_CompressFastLoop, %function; +Xoofff_CompressFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial kRoll +Xoofff_CompressFastLoop_Loop: + ldr r0, [sp, #Xoofff_Compress_input] @ add input + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors r12, r12, r1 + ldr r1, [r0], #4 + eors lr, lr, r1 + str r0, [sp, #Xoofff_Compress_input] + + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Extract and add into xAccu + ldr r0, [sp, #Xoofff_Compress_xAccu] + ldr r1, [r0] + mRloXor r2, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r2, [r0], #4 + mRloXor r3, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r3, [r0], #4 + mRloXor r4, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r4, [r0], #4 + mRloXor r5, r1, (6*_r0)%32 + str r5, [r0], #4 + + ldm r0, {r2-r5} @ note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + mRliXor r3, r9, (6*_r0+1)%32 + mRliXor r4, r6, (6*_r0+1)%32 + mRliXor r5, r7, (6*_r0+1)%32 + stm r0!, {r2-r5} + + ldm r0, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + stm r0!, {r2-r5} + + @roll kRoll + ldr r0, [sp, #Xoofff_Compress_kRoll] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + eors lr, lr, lr, LSL #13 + eors lr, lr, r2, ROR #32-3 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Compress_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Compress_length] + bcs Xoofff_CompressFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Compress_input] + ldr r1, [sp, #Xoofff_Compress_iInput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length) +@ +.equ Xoofff_Expand_yAccu , 0 +.equ Xoofff_Expand_output , 4 +.equ Xoofff_Expand_kRoll , 8 +.equ Xoofff_Expand_iOutput , 12 +.equ Xoofff_Expand_length , 16 + +.global Xoofff_ExpandFastLoop +.type Xoofff_ExpandFastLoop, %function; +Xoofff_ExpandFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial yAccu +Xoofff_ExpandFastLoop_Loop: + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Add k and extract + ldr r0, [sp, #Xoofff_Expand_kRoll] + ldr r1, [r0], #4 + mRloXor r2, r1, (6*_r0)%32 + + ldr r1, [sp, #Xoofff_Expand_output] + str r2, [r1], #4 + + ldr r2, [r0], #4 + mRloXor r3, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r3, [r1], #4 + mRloXor r4, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r4, [r1], #4 + mRloXor r5, r2, (6*_r0)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} @ Note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + str r2, [r1], #4 + mRliXor r3, r9, (6*_r0+1)%32 + str r3, [r1], #4 + mRliXor r4, r6, (6*_r0+1)%32 + str r4, [r1], #4 + mRliXor r5, r7, (6*_r0+1)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + str r2, [r1], #4 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + str r3, [r1], #4 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + str r4, [r1], #4 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + str r5, [r1], #4 + + @ roll-e yAccu + ldr r0, [sp, #Xoofff_Expand_yAccu] + str r1, [sp, #Xoofff_Expand_output] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + and r1, r6, r2 + eor lr, r1, lr, ROR #32-5 + eor lr, lr, r2, ROR #32-13 + eor lr, lr, #7 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Expand_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Expand_length] + bcs Xoofff_ExpandFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Expand_output] + ldr r1, [sp, #Xoofff_Expand_iOutput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + .align 4 + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodoo.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodyak-uf-armv7m-le-gcc.s b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodyak-uf-armv7m-le-gcc.s new file mode 100644 index 0000000..1249039 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodyak-uf-armv7m-le-gcc.s @@ -0,0 +1,565 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (Cortex-M3, ...). + + + .thumb + .syntax unified +.text + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1 , 0x00000003 +.equ _rc5x2 , 0x0b000000 +.equ _rc4x3 , 0x07000000 +.equ _rc3x4 , 0x000f0000 +.equ _rc2x5 , 0x0000d000 +.equ _rc1x6 , 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +.equ offsetInstance , 0 +.equ offsetInitialLen , 16 +.equ offsetReturn , 20 + +@ ---------------------------------------------------------------------------- +@ +@ Xoodoo_Permute_12roundsAsm: only callable from asm +@ +.type Xoodoo_Permute_12roundsAsm, %function; +Xoodoo_Permute_12roundsAsm: + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + ldr pc, [sp, #offsetReturn] + .align 4 + + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@ +@ X += Xoodyak_Rkin@ +@ XLen -= Xoodyak_Rkin@ +@ } while (XLen >= Xoodyak_Rkin)@ +@ +@ return initialLength - XLen@ +@ } +@ +.equ offsetAbsorbX , 4 +.equ offsetAbsorbXLen , 8 + +.global Xoodyak_AbsorbKeyedFullBlocks +.type Xoodyak_AbsorbKeyedFullBlocks, %function; +Xoodyak_AbsorbKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #44 + ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbKeyedFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors lr, lr, #1 + eors r12, r12, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #44 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #44 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@ +@ X += Xoodyak_Rhash@ +@ XLen -= Xoodyak_Rhash@ +@ } while (XLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - XLen@ +@ } +@ +.global Xoodyak_AbsorbHashFullBlocks +.type Xoodyak_AbsorbHashFullBlocks, %function; +Xoodyak_AbsorbHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbHashFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbHashFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r6, r6, #1 + eors r5, r5, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #16 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@ +@ Y += Xoodyak_Rkout@ +@ YLen -= Xoodyak_Rkout@ +@ } while (YLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - YLen@ +@ } +@ +.equ offsetSqueezeY , 4 +.equ offsetSqueezeYLen , 8 + +.global Xoodyak_SqueezeKeyedFullBlocks +.type Xoodyak_SqueezeKeyedFullBlocks, %function; +Xoodyak_SqueezeKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #24 + ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeKeyedFullBlocks_Loop: + eors r2, r2, #1 + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + str r7, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #24 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #24 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@ +@ Y += Xoodyak_Rhash@ +@ YLen -= Xoodyak_Rhash@ +@ } while (YLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - YLen@ +@ } +@ +.global Xoodyak_SqueezeHashFullBlocks +.type Xoodyak_SqueezeHashFullBlocks, %function; +Xoodyak_SqueezeHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeHashFullBlocks_Loop: + eors r2, r2, #1 + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeHashFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #16 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ +.equ offsetCryptI , 4+8 +.equ offsetCryptO , 8+8 +.equ offsetCryptIOLen , 12 + +.global Xoodyak_EncryptFullBlocks +.type Xoodyak_EncryptFullBlocks, %function; +Xoodyak_EncryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_EncryptFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_EncryptFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_EncryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + eors r7, r7, r1 + str r7, [r10], #4 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_EncryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ +.global Xoodyak_DecryptFullBlocks +.type Xoodyak_DecryptFullBlocks, %function; +Xoodyak_DecryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_DecryptFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_DecryptFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_DecryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + mov r2, r0 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + mov r3, r1 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + mov r4, r0 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + mov r5, r1 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + mov r6, r0 + eors r7, r7, r1 + str r7, [r10], #4 + mov r7, r1 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_DecryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + .align 4 + + diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodyak.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodyak.c new file mode 100644 index 0000000..e0b67b5 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/Xoodyak.c @@ -0,0 +1,53 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#include +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/align.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/api.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/brg_endian.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/config.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/encrypt.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/encrypt.c new file mode 100644 index 0000000..3090334 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-ARMv7M/encrypt.c @@ -0,0 +1,92 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_aead.h" +#include "api.h" +#include "Xoodyak.h" +#include + +#if !defined(CRYPTO_KEYBYTES) + #define CRYPTO_KEYBYTES 16 +#endif +#if !defined(CRYPTO_NPUBBYTES) + #define CRYPTO_NPUBBYTES 16 +#endif + +#define TAGLEN 16 + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + + (void)nsec; + + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Encrypt(&instance, m, c, (size_t)mlen); + Xoodyak_Squeeze(&instance, c + mlen, TAGLEN); + *clen = mlen + TAGLEN; + #if 0 + { + unsigned int i; + for (i = 0; i < *clen; ++i ) + { + printf("\\x%02x", c[i] ); + } + printf("\n"); + } + #endif + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + unsigned char tag[TAGLEN]; + unsigned long long mlen_; + + (void)nsec; + + *mlen = 0; + if (clen < TAGLEN) { + return -1; + } + mlen_ = clen - TAGLEN; + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_); + Xoodyak_Squeeze(&instance, tag, TAGLEN); + if (memcmp(tag, c + mlen_, TAGLEN) != 0) { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Cyclist.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Cyclist.inc b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Cyclist.inc new file mode 100644 index 0000000..ba7a156 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Cyclist.inc @@ -0,0 +1,327 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + assert(instance->mode == Cyclist_ModeKeyed); + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodoo-SnP.h new file mode 100644 index 0000000..cf1b74a --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodoo-SnP.h @@ -0,0 +1,43 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "AVR8 optimized implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 1 +#define Xoodoo_HasNround + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodoo-avr8-u1.s b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodoo-avr8-u1.s new file mode 100644 index 0000000..7f83e3d --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodoo-avr8-u1.s @@ -0,0 +1,1341 @@ +; +; The eXtended Keccak Code Package (XKCP) +; https://github.com/XKCP/XKCP +; +; The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +; +; Implementation by Ronny Van Keer, hereby denoted as "the implementer". +; +; For more information, feedback or questions, please refer to the Keccak Team website: +; https://keccak.team/ +; +; To the extent possible under law, the implementer has waived all copyright +; and related or neighboring rights to the source code in this file. +; http://creativecommons.org/publicdomain/zero/1.0/ +; +; --- +; +; This file implements Xoodoo in a SnP-compatible way. +; Please refer to SnP-documentation.h for more details. +; +; This implementation comes with Xoodoo-SnP.h in the same folder. +; Please refer to LowLevel.build for the exact list of other files it must be combined with. +; + +; INFO: Tested on ATmega1280 simulator + +; Registers used in all routines +#define zero 1 +#define rpState 24 +#define rX 26 +#define rY 28 +#define rZ 30 +#define sp 0x3D + +;---------------------------------------------------------------------------- +; +; void Xoodoo_StaticInitialize( void ) +; +.global Xoodoo_StaticInitialize + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Initialize(void *state) +; +; argument state is passed in r24:r25 +; +.global Xoodoo_Initialize +Xoodoo_Initialize: + movw rZ, r24 + ldi r23, 3*4/2 ; clear state (8 bytes / 2 lanes) per iteration +Xoodoo_Initialize_Loop: + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + dec r23 + brne Xoodoo_Initialize_Loop +Xoodoo_StaticInitialize: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_AddByte(void *state, unsigned char data, unsigned int offset) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23, only LSB (r22) is used +; argument offset is passed in r20:r21, only LSB (r20) is used +; +.global Xoodoo_AddByte +Xoodoo_AddByte: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + ld r0, Z + eor r0, r22 + st Z, r0 + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23 +; argument offset is passed in r20:r21, only LSB (r20) is used +; argument length is passed in r18:r19, only LSB (r18) is used +; +.global Xoodoo_AddBytes +Xoodoo_AddBytes: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + movw rX, r22 + subi r18, 8 + brcs Xoodoo_AddBytes_Byte + ;do 8 bytes per iteration +Xoodoo_AddBytes_Loop8: + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + subi r18, 8 + brcc Xoodoo_AddBytes_Loop8 +Xoodoo_AddBytes_Byte: + ldi r19, 8 + add r18, r19 + breq Xoodoo_AddBytes_End +Xoodoo_AddBytes_Loop1: + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + dec r18 + brne Xoodoo_AddBytes_Loop1 +Xoodoo_AddBytes_End: + ret + + +;---------------------------------------------------------------------------- +; +; void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23 +; argument offset is passed in r20:r21, only LSB (r20) is used +; argument length is passed in r18:r19, only LSB (r18) is used +; +.global Xoodoo_OverwriteBytes +Xoodoo_OverwriteBytes: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + movw rX, r22 + subi r18, 8 + brcs Xoodoo_OverwriteBytes_Byte + ;do 8 bytes per iteration +Xoodoo_OverwriteBytes_Loop8: + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + subi r18, 8 + brcc Xoodoo_OverwriteBytes_Loop8 +Xoodoo_OverwriteBytes_Byte: + ldi r19, 8 + add r18, r19 + breq Xoodoo_OverwriteBytes_End +Xoodoo_OverwriteBytes_Loop1: + ld r0, X+ + st Z+, r0 + dec r18 + brne Xoodoo_OverwriteBytes_Loop1 +Xoodoo_OverwriteBytes_End: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +; +; argument state is passed in r24:r25 +; argument byteCount is passed in r22:r23, only LSB (r22) is used +; +.global Xoodoo_OverwriteWithZeroes +Xoodoo_OverwriteWithZeroes: + movw rZ, r24 ; rZ = state + mov r23, r22 + lsr r23 + lsr r23 + lsr r23 + breq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + dec r23 + brne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + andi r22, 7 + breq Xoodoo_OverwriteWithZeroes_End +Xoodoo_OverwriteWithZeroes_LoopBytes: + st Z+, r1 + dec r22 + brne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_End: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23 +; argument offset is passed in r20:r21, only LSB (r20) is used +; argument length is passed in r18:r19, only LSB (r18) is used +; +.global Xoodoo_ExtractBytes +Xoodoo_ExtractBytes: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + movw rX, r22 + subi r18, 8 + brcs Xoodoo_ExtractBytes_Byte + ;do 8 bytes per iteration +Xoodoo_ExtractBytes_Loop8: + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + subi r18, 8 + brcc Xoodoo_ExtractBytes_Loop8 +Xoodoo_ExtractBytes_Byte: + ldi r19, 8 + add r18, r19 + breq Xoodoo_ExtractBytes_End +Xoodoo_ExtractBytes_Loop1: + ld r0, Z+ + st X+, r0 + dec r18 + brne Xoodoo_ExtractBytes_Loop1 +Xoodoo_ExtractBytes_End: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument input is passed in r22:r23 +; argument output is passed in r20:r21 +; argument offset is passed in r18:r19, only LSB (r18) is used +; argument length is passed in r16:r17, only LSB (r16) is used +; +.global Xoodoo_ExtractAndAddBytes +Xoodoo_ExtractAndAddBytes: + tst r16 + breq Xoodoo_ExtractAndAddBytes_End + push r16 + push r28 + push r29 + movw rZ, r24 + add rZ, r18 + adc rZ+1, zero + movw rX, r22 + movw rY, r20 + subi r16, 8 + brcs Xoodoo_ExtractAndAddBytes_Byte +Xoodoo_ExtractAndAddBytes_LoopLane: + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + subi r16, 8 + brcc Xoodoo_ExtractAndAddBytes_LoopLane +Xoodoo_ExtractAndAddBytes_Byte: + ldi r19, 8 + add r16, r19 + breq Xoodoo_ExtractAndAddBytes_Done +Xoodoo_ExtractAndAddBytes_Loop1: + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + dec r16 + brne Xoodoo_ExtractAndAddBytes_Loop1 +Xoodoo_ExtractAndAddBytes_Done: + pop r29 + pop r28 + pop r16 +Xoodoo_ExtractAndAddBytes_End: + ret + +Xoodoo_RoundConstants_12: + .BYTE 0x58, 0x00 + .BYTE 0x38, 0x00 + .BYTE 0xC0, 0x03 + .BYTE 0xD0, 0x00 + .BYTE 0x20, 0x01 + .BYTE 0x14, 0x00 +Xoodoo_RoundConstants_6: + .BYTE 0x60, 0x00 + .BYTE 0x2C, 0x00 + .BYTE 0x80, 0x03 + .BYTE 0xF0, 0x00 + .BYTE 0xA0, 0x01 + .BYTE 0x12, 0x00 +Xoodoo_RoundConstants_0: + .BYTE 0xFF, 0 ; terminator + + .text + +; Register variables used in permutation +#define rC0 2 // 4 regs (2-5) +#define rC1 6 // 4 regs (6-9) +#define rC2 10 // 4 regs (10-13) +#define rC3 14 // 4 regs (14-17) +#define rVv 18 // 4 regs (18-21) +#define rTt 22 // 4 regs (22-25) +// r26-27 free +#define a00 0 +#define a01 4 +#define a02 8 +#define a03 12 +#define a10 16 +#define a11 20 +#define a12 24 +#define a13 28 +#define a20 32 +#define a21 36 +#define a22 40 +#define a23 44 + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Permute_Nrounds( void *state, unsigned int nrounds ) +; +; argument state is passed in r24:r25 +; argument nrounds is passed in r22:r23 (only LSB (r22) is used) +; +.global Xoodoo_Permute_Nrounds +Xoodoo_Permute_Nrounds: + mov r26, r22 + ldi rZ+0, lo8(Xoodoo_RoundConstants_0) + ldi rZ+1, hi8(Xoodoo_RoundConstants_0) + lsl r26 + sub rZ, r26 + sbc rZ+1, zero + rjmp Xoodoo_Permute + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Permute_6rounds( void *state ) +; +; argument state is passed in r24:r25 +; +.global Xoodoo_Permute_6rounds +Xoodoo_Permute_6rounds: + ldi rZ+0, lo8(Xoodoo_RoundConstants_6) + ldi rZ+1, hi8(Xoodoo_RoundConstants_6) + rjmp Xoodoo_Permute + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Permute_12rounds( void *state ) +; +; argument state is passed in r24:r25 +; +.global Xoodoo_Permute_12rounds +Xoodoo_Permute_12rounds: + ldi rZ+0, lo8(Xoodoo_RoundConstants_12) + ldi rZ+1, hi8(Xoodoo_RoundConstants_12) +Xoodoo_Permute: + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + + ; Initial Prepare Theta + movw rY, rpState + ld rC0+0, Y+ ; a00 + ld rC0+1, Y+ + ld rC0+2, Y+ + ld rC0+3, Y+ + ld rC1+0, Y+ ; a01 + ld rC1+1, Y+ + ld rC1+2, Y+ + ld rC1+3, Y+ + ld rC2+0, Y+ ; a02 + ld rC2+1, Y+ + ld rC2+2, Y+ + ld rC2+3, Y+ + ld rC3+0, Y+ ; a03 + ld rC3+1, Y+ + ld rC3+2, Y+ + ld rC3+3, Y+ + + ld r0, Y+ ; a10 + eor rC0+0, r0 + ld r0, Y+ + eor rC0+1, r0 + ld r0, Y+ + eor rC0+2, r0 + ld r0, Y+ + eor rC0+3, r0 + ld r0, Y+ ; a11 + eor rC1+0, r0 + ld r0, Y+ + eor rC1+1, r0 + ld r0, Y+ + eor rC1+2, r0 + ld r0, Y+ + eor rC1+3, r0 + ld r0, Y+ ; a12 + eor rC2+0, r0 + ld r0, Y+ + eor rC2+1, r0 + ld r0, Y+ + eor rC2+2, r0 + ld r0, Y+ + eor rC2+3, r0 + ld r0, Y+ ; a13 + eor rC3+0, r0 + ld r0, Y+ + eor rC3+1, r0 + ld r0, Y+ + eor rC3+2, r0 + ld r0, Y+ + eor rC3+3, r0 + + ld r0, Y+ ; a20 + eor rC0+0, r0 + ld r0, Y+ + eor rC0+1, r0 + ld r0, Y+ + eor rC0+2, r0 + ld r0, Y+ + eor rC0+3, r0 + ld r0, Y+ ; a21 + eor rC1+0, r0 + ld r0, Y+ + eor rC1+1, r0 + ld r0, Y+ + eor rC1+2, r0 + ld r0, Y+ + eor rC1+3, r0 + ld r0, Y+ ; a22 + eor rC2+0, r0 + ld r0, Y+ + eor rC2+1, r0 + ld r0, Y+ + eor rC2+2, r0 + ld r0, Y+ + eor rC2+3, r0 + ld r0, Y+ ; a23 + eor rC3+0, r0 + ld r0, Y+ + eor rC3+1, r0 + ld r0, Y+ + eor rC3+2, r0 + ld r0, Y+ + eor rC3+3, r0 + sbiw rY, 48 + +Xoodoo_RoundLoop: + ; Theta + Rho west + ; c0 = ROTL32(c0 ^ ROTL32(c0, 9), 5); + mov rVv+1, rC0+0 ; rol 9 + mov rVv+2, rC0+1 + mov rVv+3, rC0+2 + mov rVv+0, rC0+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC0+0 + eor rVv+1, rC0+1 + eor rVv+2, rC0+2 + eor rVv+3, rC0+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC0+0, rVv+3 + mov rC0+1, rVv+0 + mov rC0+2, rVv+1 + mov rC0+3, rVv+2 + + ; c1 = ROTL32(c1 ^ ROTL32(c1, 9), 5); + mov rVv+1, rC1+0 ; rol 9 + mov rVv+2, rC1+1 + mov rVv+3, rC1+2 + mov rVv+0, rC1+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC1+0 + eor rVv+1, rC1+1 + eor rVv+2, rC1+2 + eor rVv+3, rC1+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC1+0, rVv+3 + mov rC1+1, rVv+0 + mov rC1+2, rVv+1 + mov rC1+3, rVv+2 + + ; c2 = ROTL32(c2 ^ ROTL32(c2, 9), 5); + mov rVv+1, rC2+0 ; rol 9 + mov rVv+2, rC2+1 + mov rVv+3, rC2+2 + mov rVv+0, rC2+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC2+0 + eor rVv+1, rC2+1 + eor rVv+2, rC2+2 + eor rVv+3, rC2+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC2+0, rVv+3 + mov rC2+1, rVv+0 + mov rC2+2, rVv+1 + mov rC2+3, rVv+2 + + ; c3 = ROTL32(c3 ^ ROTL32(c3, 9), 5); + mov rVv+1, rC3+0 ; rol 9 + mov rVv+2, rC3+1 + mov rVv+3, rC3+2 + mov rVv+0, rC3+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC3+0 + eor rVv+1, rC3+1 + eor rVv+2, rC3+2 + eor rVv+3, rC3+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC3+0, rVv+3 + mov rC3+1, rVv+0 + mov rC3+2, rVv+1 + mov rC3+3, rVv+2 + + ; v1 = a13; + ldd rVv+0, Y+a13+0 + ldd rVv+1, Y+a13+1 + ldd rVv+2, Y+a13+2 + ldd rVv+3, Y+a13+3 + + ; a13 = a12 ^ c1; + ldd r0, Y+a12+0 + eor r0, rC1+0 + std Y+a13+0, r0 + ldd r0, Y+a12+1 + eor r0, rC1+1 + std Y+a13+1, r0 + ldd r0, Y+a12+2 + eor r0, rC1+2 + std Y+a13+2, r0 + ldd r0, Y+a12+3 + eor r0, rC1+3 + std Y+a13+3, r0 + + ; a12 = a11 ^ c0; + ldd r0, Y+a11+0 + eor r0, rC0+0 + std Y+a12+0, r0 + ldd r0, Y+a11+1 + eor r0, rC0+1 + std Y+a12+1, r0 + ldd r0, Y+a11+2 + eor r0, rC0+2 + std Y+a12+2, r0 + ldd r0, Y+a11+3 + eor r0, rC0+3 + std Y+a12+3, r0 + + ; a11 = a10 ^ c3; + ldd r0, Y+a10+0 + eor r0, rC3+0 + std Y+a11+0, r0 + ldd r0, Y+a10+1 + eor r0, rC3+1 + std Y+a11+1, r0 + ldd r0, Y+a10+2 + eor r0, rC3+2 + std Y+a11+2, r0 + ldd r0, Y+a10+3 + eor r0, rC3+3 + std Y+a11+3, r0 + + ; a10 = v1 ^ c2; + eor rVv+0, rC2+0 + std Y+a10+0, rVv+0 + eor rVv+1, rC2+1 + std Y+a10+1, rVv+1 + eor rVv+2, rC2+2 + std Y+a10+2, rVv+2 + eor rVv+3, rC2+3 + std Y+a10+3, rVv+3 + + ; a20 = ROTL32(a20 ^ c3, 11); + ldd rVv+0, Y+a20+3 + eor rVv+0, rC3+3 + ldd rVv+1, Y+a20+0 + eor rVv+1, rC3+0 + ldd rVv+2, Y+a20+1 + eor rVv+2, rC3+1 + ldd rVv+3, Y+a20+2 + eor rVv+3, rC3+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a20+0, rVv+0 + std Y+a20+1, rVv+1 + std Y+a20+2, rVv+2 + std Y+a20+3, rVv+3 + + ; a21 = ROTL32(a21 ^ c0, 11); + ldd rVv+0, Y+a21+3 + eor rVv+0, rC0+3 + ldd rVv+1, Y+a21+0 + eor rVv+1, rC0+0 + ldd rVv+2, Y+a21+1 + eor rVv+2, rC0+1 + ldd rVv+3, Y+a21+2 + eor rVv+3, rC0+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a21+0, rVv+0 + std Y+a21+1, rVv+1 + std Y+a21+2, rVv+2 + std Y+a21+3, rVv+3 + + ; a22 = ROTL32(a22 ^ c1, 11); + ldd rVv+0, Y+a22+3 + eor rVv+0, rC1+3 + ldd rVv+1, Y+a22+0 + eor rVv+1, rC1+0 + ldd rVv+2, Y+a22+1 + eor rVv+2, rC1+1 + ldd rVv+3, Y+a22+2 + eor rVv+3, rC1+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a22+0, rVv+0 + std Y+a22+1, rVv+1 + std Y+a22+2, rVv+2 + std Y+a22+3, rVv+3 + + ; a23 = ROTL32(a23 ^ c2, 11); + ldd rVv+0, Y+a23+3 + eor rVv+0, rC2+3 + ldd rVv+1, Y+a23+0 + eor rVv+1, rC2+0 + ldd rVv+2, Y+a23+1 + eor rVv+2, rC2+1 + ldd rVv+3, Y+a23+2 + eor rVv+3, rC2+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a23+0, rVv+0 + std Y+a23+1, rVv+1 + std Y+a23+2, rVv+2 + std Y+a23+3, rVv+3 + + ; v1 = c3; + movw rVv+0, rC3+0 + movw rVv+2, rC3+2 + + ; c3 = a03 ^ c2; /* a03 resides in c3 */ + ldd rC3+0, Y+a03+0 + eor rC3+0, rC2+0 + ldd rC3+1, Y+a03+1 + eor rC3+1, rC2+1 + ldd rC3+2, Y+a03+2 + eor rC3+2, rC2+2 + ldd rC3+3, Y+a03+3 + eor rC3+3, rC2+3 + + ; c2 = a02 ^ c1; /* a02 resides in c2 */ + ldd rC2+0, Y+a02+0 + eor rC2+0, rC1+0 + ldd rC2+1, Y+a02+1 + eor rC2+1, rC1+1 + ldd rC2+2, Y+a02+2 + eor rC2+2, rC1+2 + ldd rC2+3, Y+a02+3 + eor rC2+3, rC1+3 + + ; c1 = a01 ^ c0; /* a01 resides in c1 */ + ldd rC1+0, Y+a01+0 + eor rC1+0, rC0+0 + ldd rC1+1, Y+a01+1 + eor rC1+1, rC0+1 + ldd rC1+2, Y+a01+2 + eor rC1+2, rC0+2 + ldd rC1+3, Y+a01+3 + eor rC1+3, rC0+3 + + ; c0 = a00 ^ v1; /* a00 resides in c0 */ + ldd rC0+0, Y+a00+0 + eor rC0+0, rVv+0 + ldd rC0+1, Y+a00+1 + eor rC0+1, rVv+1 + ldd rC0+2, Y+a00+2 + eor rC0+2, rVv+2 + ldd rC0+3, Y+a00+3 + eor rC0+3, rVv+3 + + ; c0 ^= __rc; /* +Iota */ + lpm rVv+0, Z+ + lpm rVv+1, Z+ + eor rC0+0, rVv+0 + eor rC0+1, rVv+1 + + ; Chi + Rho east + Early Theta + ; a00 = c0 ^= ~a10 & a20; + ldd r0, Y+a10+0 + com r0 + ldd rTt+0, Y+a20+0 ; a20 in rTt + and r0, rTt+0 + eor rC0+0, r0 + std Y+a00+0, rC0+0 + ldd r0, Y+a10+1 + com r0 + ldd rTt+1, Y+a20+1 + and r0, rTt+1 + eor rC0+1, r0 + std Y+a00+1, rC0+1 + ldd r0, Y+a10+2 + com r0 + ldd rTt+2, Y+a20+2 + and r0, rTt+2 + eor rC0+2, r0 + std Y+a00+2, rC0+2 + ldd r0, Y+a10+3 + com r0 + ldd rTt+3, Y+a20+3 + and r0, rTt+3 + eor rC0+3, r0 + std Y+a00+3, rC0+3 + + ; a10 ^= ~a20 & c0; + com rTt+0 + and rTt+0, rC0+0 + ldd r0, Y+a10+0 + eor rTt+0, r0 ; new a10 in rTt + std Y+a10+0, rTt+0 + com rTt+1 + and rTt+1, rC0+1 + ldd r0, Y+a10+1 + eor rTt+1, r0 + std Y+a10+1, rTt+1 + com rTt+2 + and rTt+2, rC0+2 + ldd r0, Y+a10+2 + eor rTt+2, r0 + std Y+a10+2, rTt+2 + com rTt+3 + and rTt+3, rC0+3 + ldd r0, Y+a10+3 + eor rTt+3, r0 + std Y+a10+3, rTt+3 + + ; v1(a20) = ROTL32(a20 ^ ~c0 & a10, 8); + movw rVv+0, rTt+0 ; a10 in rVv + movw rVv+2, rTt+2 + mov r0, rC0+0 + com r0 + and rTt+0, r0 + ldd r0, Y+a20+0 + eor rTt+0, r0 + + mov r0, rC0+1 + com r0 + and rTt+1, r0 + ldd r0, Y+a20+1 + eor rTt+1, r0 + + mov r0, rC0+2 + com r0 + and rTt+2, r0 + ldd r0, Y+a20+2 + eor rTt+2, r0 + + mov r0, rC0+3 + com r0 + and rTt+3, r0 + ldd r0, Y+a20+3 + eor rTt+3, r0 + std Y+a20+0, rTt+3 + std Y+a20+1, rTt+0 + std Y+a20+2, rTt+1 + std Y+a20+3, rTt+2 + + ; c0 ^= a10 = ROTL32(a10, 1); + lsl rVv+0 + rol rVv+1 + std Y+a10+1, rVv+1 + eor rC0+1, rVv+1 + rol rVv+2 + std Y+a10+2, rVv+2 + eor rC0+2, rVv+2 + rol rVv+3 + std Y+a10+3, rVv+3 + eor rC0+3, rVv+3 + adc rVv+0, zero + std Y+a10+0, rVv+0 + eor rC0+0, rVv+0 + + ; a02 = c2 ^= ~a12 & a22; + ldd r0, Y+a12+0 + com r0 + ldd rVv+0, Y+a22+0 ; a22 in rVv + and r0, rVv+0 + eor rC2+0, r0 + std Y+a02+0, rC2+0 + ldd r0, Y+a12+1 + com r0 + ldd rVv+1, Y+a22+1 + and r0, rVv+1 + eor rC2+1, r0 + std Y+a02+1, rC2+1 + ldd r0, Y+a12+2 + com r0 + ldd rVv+2, Y+a22+2 + and r0, rVv+2 + eor rC2+2, r0 + std Y+a02+2, rC2+2 + ldd r0, Y+a12+3 + com r0 + ldd rVv+3, Y+a22+3 + and r0, rVv+3 + eor rC2+3, r0 + std Y+a02+3, rC2+3 + + ; a12 ^= ~a22 & c2; + mov r0, rVv+0 ; a12 in rTt + com r0 + and r0, rC2+0 + ldd rTt+0, Y+a12+0 + eor rTt+0, r0 + std Y+a12+0, rTt+0 + mov r0, rVv+1 + com r0 + and r0, rC2+1 + ldd rTt+1, Y+a12+1 + eor rTt+1, r0 + std Y+a12+1, rTt+1 + mov r0, rVv+2 + com r0 + and r0, rC2+2 + ldd rTt+2, Y+a12+2 + eor rTt+2, r0 + std Y+a12+2, rTt+2 + mov r0, rVv+3 + com r0 + and r0, rC2+3 + ldd rTt+3, Y+a12+3 + eor rTt+3, r0 + std Y+a12+3, rTt+3 + + ; c0 ^= a20 = ROTL32(a22 ^ ~c2 & a12, 8); + mov r0, rC2+0 + com r0 + and r0, rTt+0 + eor r0, rVv+0 + ldd rVv+0, Y+a20+1 ; rVv = a22 + std Y+a20+1, r0 + eor rC0+1, r0 + mov r0, rC2+1 + com r0 + and r0, rTt+1 + eor r0, rVv+1 + ldd rVv+1, Y+a20+2 + std Y+a20+2, r0 + eor rC0+2, r0 + mov r0, rC2+2 + com r0 + and r0, rTt+2 + eor r0, rVv+2 + ldd rVv+2, Y+a20+3 + std Y+a20+3, r0 + eor rC0+3, r0 + mov r0, rC2+3 + com r0 + and r0, rTt+3 + eor r0, rVv+3 + ldd rVv+3, Y+a20+0 + std Y+a20+0, r0 + eor rC0+0, r0 + + ; c2 ^= a12 = ROTL32(a12, 1); + lsl rTt+0 + rol rTt+1 + eor rC2+1, rTt+1 + std Y+a12+1, rTt+1 + rol rTt+2 + eor rC2+2, rTt+2 + std Y+a12+2, rTt+2 + rol rTt+3 + eor rC2+3, rTt+3 + std Y+a12+3, rTt+3 + adc rTt+0, zero + eor rC2+0, rTt+0 + std Y+a12+0, rTt+0 + + ; a22 = v1; + std Y+a22+0, rVv+3 + std Y+a22+1, rVv+0 + std Y+a22+2, rVv+1 + std Y+a22+3, rVv+2 + + ; c2 ^= v1; + eor rC2+0, rVv+3 + eor rC2+1, rVv+0 + eor rC2+2, rVv+1 + eor rC2+3, rVv+2 + + ; a01 = c1 ^= ~a11 & a21; + ldd rTt+0, Y+a11+0 ;rTt holds a11 + mov r0, rTt+0 + com r0 + ldd rVv+0, Y+a21+0 ;rVv holds a21 + and r0, rVv+0 + eor rC1+0, r0 + std Y+a01+0, rC1+0 + ldd rTt+1, Y+a11+1 + mov r0, rTt+1 + com r0 + ldd rVv+1, Y+a21+1 + and r0, rVv+1 + eor rC1+1, r0 + std Y+a01+1, rC1+1 + ldd rTt+2, Y+a11+2 + mov r0, rTt+2 + com r0 + ldd rVv+2, Y+a21+2 + and r0, rVv+2 + eor rC1+2, r0 + std Y+a01+2, rC1+2 + ldd rTt+3, Y+a11+3 + mov r0, rTt+3 + com r0 + ldd rVv+3, Y+a21+3 + and r0, rVv+3 + eor rC1+3, r0 + std Y+a01+3, rC1+3 + + ; a11 ^= ~a21 & c1; + mov r0, rVv+0 + com r0 + and r0, rC1+0 + eor rTt+0, r0 + std Y+a11+0, rTt+0 + mov r0, rVv+1 + com r0 + and r0, rC1+1 + eor rTt+1, r0 + std Y+a11+1, rTt+1 + mov r0, rVv+2 + com r0 + and r0, rC1+2 + eor rTt+2, r0 + std Y+a11+2, rTt+2 + mov r0, rVv+3 + com r0 + and r0, rC1+3 + eor rTt+3, r0 + std Y+a11+3, rTt+3 + + ; v1 = ROTL32(a21 ^ ~c1 & a11, 8); + mov r0, rC1+0 + com r0 + and r0, rTt+0 + eor rVv+0, r0 ; v1 not yet ROTL32'ed(8) + mov r0, rC1+1 + com r0 + and r0, rTt+1 + eor rVv+1, r0 + mov r0, rC1+2 + com r0 + and r0, rTt+2 + eor rVv+2, r0 + mov r0, rC1+3 + com r0 + and r0, rTt+3 + eor rVv+3, r0 + + ; c1 ^= a11 = ROTL32(a11, 1); + lsl rTt+0 + rol rTt+1 + eor rC1+1, rTt+1 + std Y+a11+1, rTt+1 + rol rTt+2 + eor rC1+2, rTt+2 + std Y+a11+2, rTt+2 + rol rTt+3 + eor rC1+3, rTt+3 + std Y+a11+3, rTt+3 + adc rTt+0, zero + eor rC1+0, rTt+0 + std Y+a11+0, rTt+0 + + ; a03 = c3 ^= ~a13 & a23; + ldd r0, Y+a13+0 + com r0 + ldd rTt+0, Y+a23+0 ; a23 in rTt + and r0, rTt+0 + eor rC3+0, r0 + std Y+a03+0, rC3+0 + ldd r0, Y+a13+1 + com r0 + ldd rTt+1, Y+a23+1 + and r0, rTt+1 + eor rC3+1, r0 + std Y+a03+1, rC3+1 + ldd r0, Y+a13+2 + com r0 + ldd rTt+2, Y+a23+2 + and r0, rTt+2 + eor rC3+2, r0 + std Y+a03+2, rC3+2 + ldd r0, Y+a13+3 + com r0 + ldd rTt+3, Y+a23+3 + and r0, rTt+3 + eor rC3+3, r0 + std Y+a03+3, rC3+3 + + ; a13 ^= ~a23 & c3; + mov r0, rTt+0 + com r0 + and r0, rC3+0 + ldd rTt+0, Y+a13+0 ; a13 in rTt + eor rTt+0, r0 + mov r0, rTt+1 + com r0 + and r0, rC3+1 + ldd rTt+1, Y+a13+1 + eor rTt+1, r0 + mov r0, rTt+2 + com r0 + and r0, rC3+2 + ldd rTt+2, Y+a13+2 + eor rTt+2, r0 + mov r0, rTt+3 + com r0 + and r0, rC3+3 + ldd rTt+3, Y+a13+3 + eor rTt+3, r0 + + ; c1 ^= a21 = ROTL32(a23 ^ ~c3 & a13, 8); + push rVv + mov r0, rC3+0 + com r0 + and r0, rTt+0 + ldd rVv, Y+a23+0 + eor r0, rVv + eor rC1+1, r0 + std Y+a21+1, r0 + mov r0, rC3+1 + com r0 + and r0, rTt+1 + ldd rVv, Y+a23+1 + eor r0, rVv + eor rC1+2, r0 + std Y+a21+2, r0 + mov r0, rC3+2 + com r0 + and r0, rTt+2 + ldd rVv, Y+a23+2 + eor r0, rVv + eor rC1+3, r0 + std Y+a21+3, r0 + mov r0, rC3+3 + com r0 + and r0, rTt+3 + ldd rVv, Y+a23+3 + eor r0, rVv + eor rC1+0, r0 + std Y+a21+0, r0 + pop rVv + + ; a23 = v1; + std Y+a23+0, rVv+3 ; rol8(rVv) + std Y+a23+1, rVv+0 + std Y+a23+2, rVv+1 + std Y+a23+3, rVv+2 + + ; c3 ^= v1; + eor rC3+0, rVv+3 + eor rC3+1, rVv+0 + eor rC3+2, rVv+1 + eor rC3+3, rVv+2 + + ; c3 ^= a13 = ROTL32(a13, 1); + lsl rTt+0 + rol rTt+1 + std Y+a13+1, rTt+1 + eor rC3+1, rTt+1 + rol rTt+2 + std Y+a13+2, rTt+2 + eor rC3+2, rTt+2 + rol rTt+3 + std Y+a13+3, rTt+3 + eor rC3+3, rTt+3 + adc rTt+0, zero + std Y+a13+0, rTt+0 + eor rC3+0, rTt+0 + + ; Check for terminator + lpm r0, Z + inc r0 + breq Xoodoo_Done + rjmp Xoodoo_RoundLoop +Xoodoo_Done: + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + ret diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodoo.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodyak.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodyak.c new file mode 100644 index 0000000..e0b67b5 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/Xoodyak.c @@ -0,0 +1,53 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#include +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/align.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/api.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/brg_endian.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/config.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/encrypt.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/encrypt.c new file mode 100644 index 0000000..3090334 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-AVR8/encrypt.c @@ -0,0 +1,92 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_aead.h" +#include "api.h" +#include "Xoodyak.h" +#include + +#if !defined(CRYPTO_KEYBYTES) + #define CRYPTO_KEYBYTES 16 +#endif +#if !defined(CRYPTO_NPUBBYTES) + #define CRYPTO_NPUBBYTES 16 +#endif + +#define TAGLEN 16 + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + + (void)nsec; + + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Encrypt(&instance, m, c, (size_t)mlen); + Xoodyak_Squeeze(&instance, c + mlen, TAGLEN); + *clen = mlen + TAGLEN; + #if 0 + { + unsigned int i; + for (i = 0; i < *clen; ++i ) + { + printf("\\x%02x", c[i] ); + } + printf("\n"); + } + #endif + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + unsigned char tag[TAGLEN]; + unsigned long long mlen_; + + (void)nsec; + + *mlen = 0; + if (clen < TAGLEN) { + return -1; + } + mlen_ = clen - TAGLEN; + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_); + Xoodyak_Squeeze(&instance, tag, TAGLEN); + if (memcmp(tag, c + mlen_, TAGLEN) != 0) { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Cyclist.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Cyclist.inc b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Cyclist.inc new file mode 100644 index 0000000..ba7a156 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Cyclist.inc @@ -0,0 +1,327 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + assert(instance->mode == Cyclist_ModeKeyed); + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodoo-SnP.h new file mode 100644 index 0000000..2927919 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodoo-SnP.h @@ -0,0 +1,56 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 +#define Xoodoo_HasNround + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +//#define Xoodoo_FastXoofff_supported +//void Xoofff_AddIs( uint8_t *output, const uint8_t *input, size_t bitLen); +//size_t Xoofff_CompressFastLoop(uint8_t *k, uint8_t *xAccu, const uint8_t *input, size_t length); +//size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodoo-optimized.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodoo-optimized.c new file mode 100644 index 0000000..ce86971 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodoo-optimized.c @@ -0,0 +1,399 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include +#include "Xoodoo.h" + +#define VERBOSE 0 + +#if (VERBOSE > 0) + #define Dump(__t) printf(__t "\n"); \ + printf("a00 %08x, a01 %08x, a02 %08x, a03 %08x\n", a00, a01, a02, a03 ); \ + printf("a10 %08x, a11 %08x, a12 %08x, a13 %08x\n", a10, a11, a12, a13 ); \ + printf("a20 %08x, a21 %08x, a22 %08x, a23 %08x\n\n", a20, a21, a22, a23 ); +#else + #define Dump(__t) +#endif + +#if (VERBOSE >= 1) + #define Dump1(__t) Dump(__t) +#else + #define Dump1(__t) +#endif + +#if (VERBOSE >= 2) + #define Dump2(__t) Dump(__t) +#else + #define Dump2(__t) +#endif + +#if (VERBOSE >= 3) + #define Dump3(__t) Dump(__t) +#else + #define Dump3(__t) +#endif + +/* ---------------------------------------------------------------- */ + +void Xoodoo_Initialize(void *state) +{ + memset(state, 0, NLANES*sizeof(tXoodooLane)); +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_AddBytes(void *argState, const unsigned char *argdata, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + if (length == (3*4*4)) { + uint32_t *state = (uint32_t *)argState; + uint32_t *data = (uint32_t *)argdata; + state[0] ^= data[0]; + state[1] ^= data[1]; + state[2] ^= data[2]; + state[3] ^= data[3]; + state[4] ^= data[4]; + state[5] ^= data[5]; + state[6] ^= data[6]; + state[7] ^= data[7]; + state[8] ^= data[8]; + state[9] ^= data[9]; + state[10] ^= data[10]; + state[11] ^= data[11]; + } + else { + unsigned int sizeLeft = length; + unsigned int lanePosition = offset/4; + unsigned int offsetInLane = offset%4; + const unsigned char *curData = argdata; + uint32_t *state = (uint32_t*)argState; + + state += lanePosition; + if ((sizeLeft > 0) && (offsetInLane != 0)) { + unsigned int bytesInLane = 4 - offsetInLane; + uint32_t lane = 0; + if (bytesInLane > sizeLeft) + bytesInLane = sizeLeft; + memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane); + *state++ ^= lane; + sizeLeft -= bytesInLane; + curData += bytesInLane; + } + + while(sizeLeft >= 4) { + *state++ ^= READ32_UNALIGNED( curData ); + sizeLeft -= 4; + curData += 4; + } + + if (sizeLeft > 0) { + uint32_t lane = 0; + memcpy(&lane, curData, sizeLeft); + *state ^= lane; + } + } +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_OverwriteBytes(void *argstate, const unsigned char *argdata, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + if (length == (3*4*4)) { + uint32_t *state = (uint32_t *)argstate; + uint32_t *data = (uint32_t *)argdata; + state[0] = data[0]; + state[1] = data[1]; + state[2] = data[2]; + state[3] = data[3]; + state[4] = data[4]; + state[5] = data[5]; + state[6] = data[6]; + state[7] = data[7]; + state[8] = data[8]; + state[9] = data[9]; + state[10] = data[10]; + state[11] = data[11]; + } + else + memcpy((unsigned char*)argstate+offset, argdata, length); +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_OverwriteWithZeroes(void *argstate, unsigned int byteCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memset(argstate, 0, byteCount); +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, (unsigned char*)state+offset, length); +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_ExtractAndAddBytes(const void *argState, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + if (length == (3*4*4)) { + uint32_t *state = (uint32_t *)argState; + const uint32_t *ii = (const uint32_t *)input; + uint32_t *oo = (uint32_t *)output; + + oo[0] = state[0] ^ ii[0]; + oo[1] = state[1] ^ ii[1]; + oo[2] = state[2] ^ ii[2]; + oo[3] = state[3] ^ ii[3]; + oo[4] = state[4] ^ ii[4]; + oo[5] = state[5] ^ ii[5]; + oo[6] = state[6] ^ ii[6]; + oo[7] = state[7] ^ ii[7]; + oo[8] = state[8] ^ ii[8]; + oo[9] = state[9] ^ ii[9]; + oo[10] = state[10] ^ ii[10]; + oo[11] = state[11] ^ ii[11]; + } + else { + unsigned int sizeLeft = length; + unsigned int lanePosition = offset/4; + unsigned int offsetInLane = offset%4; + const unsigned char *curInput = input; + unsigned char *curOutput = output; + const uint32_t *state = (const uint32_t*)argState; + + state += lanePosition; + if ((sizeLeft > 0) && (offsetInLane != 0)) { + unsigned int bytesInLane = 4 - offsetInLane; + uint32_t lane = *state++ >> (offsetInLane * 8); + if (bytesInLane > sizeLeft) + bytesInLane = sizeLeft; + sizeLeft -= bytesInLane; + do { + *curOutput++ = (*curInput++) ^ (unsigned char)lane; + lane >>= 8; + } + while ( --bytesInLane != 0); + } + + while(sizeLeft >= 4) { + WRITE32_UNALIGNED( curOutput, READ32_UNALIGNED( curInput ) ^ *state++ ); + sizeLeft -= 4; + curInput += 4; + curOutput += 4; + } + + if (sizeLeft > 0) { + uint32_t lane = *state; + do { + *curOutput++ = (*curInput++) ^ (unsigned char)lane; + lane >>= 8; + } + while ( --sizeLeft != 0 ); + } + } +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +#define DeclareVars uint32_t a00, a01, a02, a03; \ + uint32_t a10, a11, a12, a13; \ + uint32_t a20, a21, a22, a23; \ + uint32_t v1, v2 + +#define State2Vars a00 = state[0+0], a01 = state[0+1], a02 = state[0+2], a03 = state[0+3]; \ + a10 = state[4+0], a11 = state[4+1], a12 = state[4+2], a13 = state[4+3]; \ + a20 = state[8+0], a21 = state[8+1], a22 = state[8+2], a23 = state[8+3] + +#define Vars2State state[0+0] = a00, state[0+1] = a01, state[0+2] = a02, state[0+3] = a03; \ + state[4+0] = a10, state[4+1] = a11, state[4+2] = a12, state[4+3] = a13; \ + state[8+0] = a20, state[8+1] = a21, state[8+2] = a22, state[8+3] = a23 + +/* +** Theta: Column Parity Mixer +*/ +#define Theta() \ + v1 = a03 ^ a13 ^ a23; \ + v2 = a00 ^ a10 ^ a20; \ + v1 = ROTL32(v1, 5) ^ ROTL32(v1, 14); \ + a00 ^= v1; \ + a10 ^= v1; \ + a20 ^= v1; \ + v1 = a01 ^ a11 ^ a21; \ + v2 = ROTL32(v2, 5) ^ ROTL32(v2, 14); \ + a01 ^= v2; \ + a11 ^= v2; \ + a21 ^= v2; \ + v2 = a02 ^ a12 ^ a22; \ + v1 = ROTL32(v1, 5) ^ ROTL32(v1, 14); \ + a02 ^= v1; \ + a12 ^= v1; \ + a22 ^= v1; \ + v2 = ROTL32(v2, 5) ^ ROTL32(v2, 14); \ + a03 ^= v2; \ + a13 ^= v2; \ + a23 ^= v2 + +/* +** Rho-west: Plane shift +*/ +#define Rho_west() \ + a20 = ROTL32(a20, 11); \ + a21 = ROTL32(a21, 11); \ + a22 = ROTL32(a22, 11); \ + a23 = ROTL32(a23, 11); \ + v1 = a13; \ + a13 = a12; \ + a12 = a11; \ + a11 = a10; \ + a10 = v1 + +/* +** Iota: Round constants +*/ +#define Iota(__rc) a00 ^= __rc + +/* +** Chi: Non linear step, on colums +*/ +#define Chi() \ + a00 ^= ~a10 & a20; \ + a10 ^= ~a20 & a00; \ + a20 ^= ~a00 & a10; \ + \ + a01 ^= ~a11 & a21; \ + a11 ^= ~a21 & a01; \ + a21 ^= ~a01 & a11; \ + \ + a02 ^= ~a12 & a22; \ + a12 ^= ~a22 & a02; \ + a22 ^= ~a02 & a12; \ + \ + a03 ^= ~a13 & a23; \ + a13 ^= ~a23 & a03; \ + a23 ^= ~a03 & a13 + +/* +** Rho-east: Plane shift +*/ +#define Rho_east() \ + a10 = ROTL32(a10, 1); \ + a11 = ROTL32(a11, 1); \ + a12 = ROTL32(a12, 1); \ + a13 = ROTL32(a13, 1); \ + v1 = ROTL32(a23, 8); \ + a23 = ROTL32(a21, 8); \ + a21 = v1; \ + v1 = ROTL32(a22, 8); \ + a22 = ROTL32(a20, 8); \ + a20 = v1 + +#define Round(__rc) \ + Theta(); \ + Dump3("Theta"); \ + Rho_west(); \ + Dump3("Rho-west"); \ + Iota(__rc); \ + Dump3("Iota"); \ + Chi(); \ + Dump3("Chi"); \ + Rho_east(); \ + Dump3("Rho-east") + +static const uint32_t RC[MAXROUNDS] = { + _rc12, + _rc11, + _rc10, + _rc9, + _rc8, + _rc7, + _rc6, + _rc5, + _rc4, + _rc3, + _rc2, + _rc1 +}; + +void Xoodoo_Permute_Nrounds( uint32_t * state, uint32_t nr ) +{ + DeclareVars; + uint32_t i; + + State2Vars; + for (i = MAXROUNDS - nr; i < MAXROUNDS; ++i ) { + Round(RC[i]); + Dump2("Round"); + } + Dump1("Permutation"); + Vars2State; +} + +void Xoodoo_Permute_6rounds( uint32_t * state) +{ + DeclareVars; + + State2Vars; + Round(_rc6); + Round(_rc5); + Round(_rc4); + Round(_rc3); + Round(_rc2); + Round(_rc1); + Dump1("Permutation"); + Vars2State; +} + +void Xoodoo_Permute_12rounds( uint32_t * state) +{ + DeclareVars; + + State2Vars; + Round(_rc12); + Round(_rc11); + Round(_rc10); + Round(_rc9); + Round(_rc8); + Round(_rc7); + Round(_rc6); + Round(_rc5); + Round(_rc4); + Round(_rc3); + Round(_rc2); + Round(_rc1); + Dump1("Permutation"); + Vars2State; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodoo.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodyak-full-blocks.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodyak-full-blocks.c new file mode 100644 index 0000000..99a62ea --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodyak-full-blocks.c @@ -0,0 +1,127 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include +#include +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +#ifdef OUTPUT +#include +#endif + +#define SnP_AddByte Xoodoo_AddByte +#define SnP_AddBytes Xoodoo_AddBytes +#define SnP_ExtractBytes Xoodoo_ExtractBytes +#define SnP_ExtractAndAddBytes Xoodoo_ExtractAndAddBytes +#define SnP_Permute Xoodoo_Permute_12rounds +#define SnP_OverwriteBytes Xoodoo_OverwriteBytes + +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +{ + size_t initialLength = XLen; + + do { + SnP_Permute(state); /* Xoodyak_Up(instance, NULL, 0, 0); */ + SnP_AddBytes(state, X, 0, Xoodyak_Rkin); /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0); */ + SnP_AddByte(state, 0x01, Xoodyak_Rkin); + X += Xoodyak_Rkin; + XLen -= Xoodyak_Rkin; + } while (XLen >= Xoodyak_Rkin); + + return initialLength - XLen; +} + +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +{ + size_t initialLength = XLen; + + do { + SnP_Permute(state); /* Xoodyak_Up(instance, NULL, 0, 0); */ + SnP_AddBytes(state, X, 0, Xoodyak_Rhash); /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0); */ + SnP_AddByte(state, 0x01, Xoodyak_Rhash); + X += Xoodyak_Rhash; + XLen -= Xoodyak_Rhash; + } while (XLen >= Xoodyak_Rhash); + + return initialLength - XLen; +} + + +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +{ + size_t initialLength = YLen; + + do { + SnP_AddByte(state, 0x01, 0); /* Xoodyak_Down(instance, NULL, 0, 0); */ + SnP_Permute(state); /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0); */ + SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout); + Y += Xoodyak_Rkout; + YLen -= Xoodyak_Rkout; + } while (YLen >= Xoodyak_Rkout); + + return initialLength - YLen; +} + +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +{ + size_t initialLength = YLen; + + do { + SnP_AddByte(state, 0x01, 0); /* Xoodyak_Down(instance, NULL, 0, 0); */ + SnP_Permute(state); /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0); */ + SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash); + Y += Xoodyak_Rhash; + YLen -= Xoodyak_Rhash; + } while (YLen >= Xoodyak_Rhash); + + return initialLength - YLen; +} + +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +{ + size_t initialLength = IOLen; + + do { + SnP_Permute(state); + SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout); + SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout); + SnP_AddByte(state, 0x01, Xoodyak_Rkout); + I += Xoodyak_Rkout; + O += Xoodyak_Rkout; + IOLen -= Xoodyak_Rkout; + } while (IOLen >= Xoodyak_Rkout); + + return initialLength - IOLen; +} + +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +{ + size_t initialLength = IOLen; + + do { + SnP_Permute(state); + SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout); + SnP_AddBytes(state, O, 0, Xoodyak_Rkout); + SnP_AddByte(state, 0x01, Xoodyak_Rkout); + I += Xoodyak_Rkout; + O += Xoodyak_Rkout; + IOLen -= Xoodyak_Rkout; + } while (IOLen >= Xoodyak_Rkout); + + return initialLength - IOLen; +} diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodyak.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodyak.c new file mode 100644 index 0000000..e0b67b5 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/Xoodyak.c @@ -0,0 +1,53 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#include +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/align.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/api.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/brg_endian.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/config.h b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/encrypt.c b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/encrypt.c new file mode 100644 index 0000000..3090334 --- /dev/null +++ b/xoodyak/Implementations/crypto_aead/xoodyakv1/XKCP-plain-ua/encrypt.c @@ -0,0 +1,92 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_aead.h" +#include "api.h" +#include "Xoodyak.h" +#include + +#if !defined(CRYPTO_KEYBYTES) + #define CRYPTO_KEYBYTES 16 +#endif +#if !defined(CRYPTO_NPUBBYTES) + #define CRYPTO_NPUBBYTES 16 +#endif + +#define TAGLEN 16 + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + + (void)nsec; + + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Encrypt(&instance, m, c, (size_t)mlen); + Xoodyak_Squeeze(&instance, c + mlen, TAGLEN); + *clen = mlen + TAGLEN; + #if 0 + { + unsigned int i; + for (i = 0; i < *clen; ++i ) + { + printf("\\x%02x", c[i] ); + } + printf("\n"); + } + #endif + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + Xoodyak_Instance instance; + unsigned char tag[TAGLEN]; + unsigned long long mlen_; + + (void)nsec; + + *mlen = 0; + if (clen < TAGLEN) { + return -1; + } + mlen_ = clen - TAGLEN; + Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES); + Xoodyak_Absorb(&instance, ad, (size_t)adlen); + Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_); + Xoodyak_Squeeze(&instance, tag, TAGLEN); + if (memcmp(tag, c + mlen_, TAGLEN) != 0) { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Cyclist.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Cyclist.inc b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Cyclist.inc new file mode 100644 index 0000000..ba7a156 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Cyclist.inc @@ -0,0 +1,327 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + assert(instance->mode == Cyclist_ModeKeyed); + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodoo-SnP.h new file mode 100644 index 0000000..7d0c98b --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodoo-SnP.h @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized ARM assembler implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#define Xoodoo_FastXoofff_supported +void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen); +size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length); +size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodoo-uf-armv6-le-gcc.s b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodoo-uf-armv6-le-gcc.s new file mode 100644 index 0000000..0baa5db --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodoo-uf-armv6-le-gcc.s @@ -0,0 +1,726 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6 architecture (e.g.,@ ARM11). + + +.text + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Initialize(void *state) +@ + .align 4 +.global Xoodoo_Initialize +.type Xoodoo_Initialize, %function; +Xoodoo_Initialize: + movs r1, #0 + movs r2, #0 + movs r3, #0 + movs r12, #0 + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_AddBytes +.type Xoodoo_AddBytes, %function; +Xoodoo_AddBytes: + push {r4,lr} + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_AddBytes_Bytes +Xoodoo_AddBytes_LanesLoop: @ then, perform on lanes + ldr r2, [r0] + ldr r4, [r1], #4 + eors r2, r2, r4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_AddBytes_LanesLoop +Xoodoo_AddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_AddBytes_Exit +Xoodoo_AddBytes_BytesLoop: + ldrb r2, [r0] + ldrb r4, [r1], #1 + eors r2, r2, r4 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_AddBytes_BytesLoop +Xoodoo_AddBytes_Exit: + pop {r4,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_OverwriteBytes +.type Xoodoo_OverwriteBytes, %function; +Xoodoo_OverwriteBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_OverwriteBytes_Bytes +Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words + ldr r2, [r1], #4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_OverwriteBytes_LanesLoop +Xoodoo_OverwriteBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_OverwriteBytes_Exit +Xoodoo_OverwriteBytes_BytesLoop: + ldrb r2, [r1], #1 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_OverwriteBytes_BytesLoop +Xoodoo_OverwriteBytes_Exit: + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +@ + .align 4 +.global Xoodoo_OverwriteWithZeroes +.type Xoodoo_OverwriteWithZeroes, %function; +Xoodoo_OverwriteWithZeroes: + movs r3, #0 + lsrs r2, r1, #2 + beq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + str r3, [r0], #4 + subs r2, r2, #1 + bne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + ands r1, #3 + beq Xoodoo_OverwriteWithZeroes_Exit +Xoodoo_OverwriteWithZeroes_LoopBytes: + strb r3, [r0], #1 + subs r1, r1, #1 + bne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_Exit: + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_ExtractBytes +.type Xoodoo_ExtractBytes, %function; +Xoodoo_ExtractBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractBytes_Bytes +Xoodoo_ExtractBytes_LanesLoop: @ then, handle words + ldr r2, [r0], #4 + str r2, [r1], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractBytes_LanesLoop +Xoodoo_ExtractBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractBytes_Exit +Xoodoo_ExtractBytes_BytesLoop: + ldrb r2, [r0], #1 + strb r2, [r1], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractBytes_BytesLoop +Xoodoo_ExtractBytes_Exit: + bx lr + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +@ + .align 4 +.global Xoodoo_ExtractAndAddBytes +.type Xoodoo_ExtractAndAddBytes, %function; +Xoodoo_ExtractAndAddBytes: + push {r4,r5} + adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length) + ldr r3, [sp, #8] @ get length argument from stack + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractAndAddBytes_Bytes +Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, handle words + ldr r5, [r0], #4 + ldr r4, [r1], #4 + eors r5, r5, r4 + str r5, [r2], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractAndAddBytes_LanesLoop +Xoodoo_ExtractAndAddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractAndAddBytes_Exit +Xoodoo_ExtractAndAddBytes_BytesLoop: + ldrb r5, [r0], #1 + ldrb r4, [r1], #1 + eors r5, r5, r4 + strb r5, [r2], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractAndAddBytes_BytesLoop +Xoodoo_ExtractAndAddBytes_Exit: + pop {r4,r5} + bx lr + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1, 0x00000003 +.equ _rc5x2, 0x0b000000 +.equ _rc4x3, 0x07000000 +.equ _rc3x4, 0x000f0000 +.equ _rc2x5, 0x0000d000 +.equ _rc1x6, 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_6rounds( void *state ) +@ + .align 4 +.global Xoodoo_Permute_6rounds +.type Xoodoo_Permute_6rounds, %function; +Xoodoo_Permute_6rounds: + push {r0,r4-r11,lr} + ldmia r0!, {r2-r5} + ldmia r0!, {r8-r9} + ldmia r0!, {r6-r7} + ldmia r0, {r10-r12,lr} + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + pop {r0,r1} + ror r2, r2, #32-(6*_r0)%32 + ror r3, r3, #32-(6*_r0)%32 + ror r4, r4, #32-(6*_r0)%32 + ror r5, r5, #32-(6*_r0)%32 + ror r6, r6, #32-(6*_r0+1)%32 + ror r7, r7, #32-(6*_r0+1)%32 + ror r8, r8, #32-(6*_r0+1)%32 + ror r9, r9, #32-(6*_r0+1)%32 + ror r10, r10, #32-(6*_r0+_e1+_w1)%32 + ror r11, r11, #32-(6*_r0+_e1+_w1)%32 + ror r12, r12, #32-(6*_r0+_e1+_w1)%32 + ror lr, lr, #32-(6*_r0+_e1+_w1)%32 + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_12rounds( void *state ) +@ + .align 4 +.global Xoodoo_Permute_12rounds +.type Xoodoo_Permute_12rounds, %function; +Xoodoo_Permute_12rounds: + push {r0,r4-r11,lr} + ldmia r0, {r2-r12,lr} + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + pop {r0,r1} + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + + +.equ Xoofff_BlockSize , 3*4*4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen) + .align 4 +.global Xoofff_AddIs +.type Xoofff_AddIs, %function; +Xoofff_AddIs: + push {r4-r10,lr} + + subs r2, r2, #Xoofff_BlockSize*8 + bcc Xoofff_AddIs_LessThanBlock +Xoofff_AddIs_BlockLoop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + subs r2, r2, #Xoofff_BlockSize*8 + bcs Xoofff_AddIs_BlockLoop +Xoofff_AddIs_LessThanBlock: + adds r2, r2, #Xoofff_BlockSize*8 + beq Xoofff_AddIs_Return + subs r2, r2, #16*8 + bcc Xoofff_AddIs_LessThan16 +Xoofff_AddIs_16Loop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + subs r2, r2, #16*8 + bcs Xoofff_AddIs_16Loop +Xoofff_AddIs_LessThan16: + adds r2, r2, #16*8 + beq Xoofff_AddIs_Return + subs r2, r2, #4*8 + bcc Xoofff_AddIs_LessThan4 +Xoofff_AddIs_4Loop: + ldr r3, [r0] + ldr r7, [r1], #4 + eors r3, r3, r7 + str r3, [r0], #4 + subs r2, r2, #4*8 + bcs Xoofff_AddIs_4Loop +Xoofff_AddIs_LessThan4: + adds r2, r2, #4*8 + beq Xoofff_AddIs_Return + subs r2, r2, #8 + bcc Xoofff_AddIs_LessThan1 +Xoofff_AddIs_1Loop: + ldrb r3, [r0] + ldrb r7, [r1], #1 + eors r3, r3, r7 + strb r3, [r0], #1 + subs r2, r2, #8 + bcs Xoofff_AddIs_1Loop +Xoofff_AddIs_LessThan1: + adds r2, r2, #8 + beq Xoofff_AddIs_Return + ldrb r3, [r0] + ldrb r7, [r1] + movs r1, #1 + eors r3, r3, r7 + lsls r1, r1, r2 + subs r1, r1, #1 + ands r3, r3, r1 + strb r3, [r0] +Xoofff_AddIs_Return: + pop {r4-r10,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length) +@ +.equ Xoofff_Compress_kRoll , 0 +.equ Xoofff_Compress_input , 4 +.equ Xoofff_Compress_xAccu , 8 +.equ Xoofff_Compress_iInput , 12 +.equ Xoofff_Compress_length , 16 + + .align 4 +.global Xoofff_CompressFastLoop +.type Xoofff_CompressFastLoop, %function; +Xoofff_CompressFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial kRoll +Xoofff_CompressFastLoop_Loop: + ldr r0, [sp, #Xoofff_Compress_input] @ add input + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors r12, r12, r1 + ldr r1, [r0], #4 + eors lr, lr, r1 + str r0, [sp, #Xoofff_Compress_input] + + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Extract and add into xAccu + ldr r0, [sp, #Xoofff_Compress_xAccu] + ldr r1, [r0] + mRloXor r2, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r2, [r0], #4 + mRloXor r3, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r3, [r0], #4 + mRloXor r4, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r4, [r0], #4 + mRloXor r5, r1, (6*_r0)%32 + str r5, [r0], #4 + + ldm r0, {r2-r5} @ note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + mRliXor r3, r9, (6*_r0+1)%32 + mRliXor r4, r6, (6*_r0+1)%32 + mRliXor r5, r7, (6*_r0+1)%32 + stm r0!, {r2-r5} + + ldm r0, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + stm r0!, {r2-r5} + + @roll kRoll + ldr r0, [sp, #Xoofff_Compress_kRoll] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + eors lr, lr, lr, LSL #13 + eors lr, lr, r2, ROR #32-3 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Compress_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Compress_length] + bcs Xoofff_CompressFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Compress_input] + ldr r1, [sp, #Xoofff_Compress_iInput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length) +@ +.equ Xoofff_Expand_yAccu , 0 +.equ Xoofff_Expand_output , 4 +.equ Xoofff_Expand_kRoll , 8 +.equ Xoofff_Expand_iOutput , 12 +.equ Xoofff_Expand_length , 16 + + .align 4 +.global Xoofff_ExpandFastLoop +.type Xoofff_ExpandFastLoop, %function; +Xoofff_ExpandFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial yAccu +Xoofff_ExpandFastLoop_Loop: + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Add k and extract + ldr r0, [sp, #Xoofff_Expand_kRoll] + ldr r1, [r0], #4 + mRloXor r2, r1, (6*_r0)%32 + + ldr r1, [sp, #Xoofff_Expand_output] + str r2, [r1], #4 + + ldr r2, [r0], #4 + mRloXor r3, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r3, [r1], #4 + mRloXor r4, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r4, [r1], #4 + mRloXor r5, r2, (6*_r0)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} @ Note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + str r2, [r1], #4 + mRliXor r3, r9, (6*_r0+1)%32 + str r3, [r1], #4 + mRliXor r4, r6, (6*_r0+1)%32 + str r4, [r1], #4 + mRliXor r5, r7, (6*_r0+1)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + str r2, [r1], #4 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + str r3, [r1], #4 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + str r4, [r1], #4 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + str r5, [r1], #4 + + @ roll-e yAccu + ldr r0, [sp, #Xoofff_Expand_yAccu] + str r1, [sp, #Xoofff_Expand_output] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + and r1, r6, r2 + eor lr, r1, lr, ROR #32-5 + eor lr, lr, r2, ROR #32-13 + eor lr, lr, #7 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Expand_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Expand_length] + bcs Xoofff_ExpandFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Expand_output] + ldr r1, [sp, #Xoofff_Expand_iOutput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + + diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodoo.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodyak-uf-armv6-le-gcc.s b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodyak-uf-armv6-le-gcc.s new file mode 100644 index 0000000..68fb7db --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodyak-uf-armv6-le-gcc.s @@ -0,0 +1,563 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6 architecture (e.g.,@ ARM11). + + +.text + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1 , 0x00000003 +.equ _rc5x2 , 0x0b000000 +.equ _rc4x3 , 0x07000000 +.equ _rc3x4 , 0x000f0000 +.equ _rc2x5 , 0x0000d000 +.equ _rc1x6 , 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +.equ offsetInstance , 0 +.equ offsetInitialLen , 16 +.equ offsetReturn , 20 + +@ ---------------------------------------------------------------------------- +@ +@ Xoodoo_Permute_12roundsAsm: only callable from asm +@ + .align 4 +.type Xoodoo_Permute_12roundsAsm, %function; +Xoodoo_Permute_12roundsAsm: + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + ldr pc, [sp, #offsetReturn] + + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@ +@ X += Xoodyak_Rkin@ +@ XLen -= Xoodyak_Rkin@ +@ } while (XLen >= Xoodyak_Rkin)@ +@ +@ return initialLength - XLen@ +@ } +@ +.equ offsetAbsorbX , 4 +.equ offsetAbsorbXLen , 8 + + .align 4 +.global Xoodyak_AbsorbKeyedFullBlocks +.type Xoodyak_AbsorbKeyedFullBlocks, %function; +Xoodyak_AbsorbKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #44 + ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbKeyedFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors lr, lr, #1 + eors r12, r12, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #44 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #44 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@ +@ X += Xoodyak_Rhash@ +@ XLen -= Xoodyak_Rhash@ +@ } while (XLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - XLen@ +@ } +@ + .align 4 +.global Xoodyak_AbsorbHashFullBlocks +.type Xoodyak_AbsorbHashFullBlocks, %function; +Xoodyak_AbsorbHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbHashFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbHashFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r6, r6, #1 + eors r5, r5, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #16 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@ +@ Y += Xoodyak_Rkout@ +@ YLen -= Xoodyak_Rkout@ +@ } while (YLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - YLen@ +@ } +@ +.equ offsetSqueezeY , 4 +.equ offsetSqueezeYLen , 8 + + .align 4 +.global Xoodyak_SqueezeKeyedFullBlocks +.type Xoodyak_SqueezeKeyedFullBlocks, %function; +Xoodyak_SqueezeKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #24 + ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeKeyedFullBlocks_Loop: + eors r2, r2, #1 + b Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + str r7, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #24 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #24 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@ +@ Y += Xoodyak_Rhash@ +@ YLen -= Xoodyak_Rhash@ +@ } while (YLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - YLen@ +@ } +@ + .align 4 +.global Xoodyak_SqueezeHashFullBlocks +.type Xoodyak_SqueezeHashFullBlocks, %function; +Xoodyak_SqueezeHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeHashFullBlocks_Loop: + eors r2, r2, #1 + b Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeHashFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #16 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ +.equ offsetCryptI , 4+8 +.equ offsetCryptO , 8+8 +.equ offsetCryptIOLen , 12 + + .align 4 +.global Xoodyak_EncryptFullBlocks +.type Xoodyak_EncryptFullBlocks, %function; +Xoodyak_EncryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_EncryptFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_EncryptFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_EncryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + eors r7, r7, r1 + str r7, [r10], #4 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_EncryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ + .align 4 +.global Xoodyak_DecryptFullBlocks +.type Xoodyak_DecryptFullBlocks, %function; +Xoodyak_DecryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_DecryptFullBlocks_Ret + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_DecryptFullBlocks_Loop: + b Xoodoo_Permute_12roundsAsm +Xoodyak_DecryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + mov r2, r0 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + mov r3, r1 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + mov r4, r0 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + mov r5, r1 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + mov r6, r0 + eors r7, r7, r1 + str r7, [r10], #4 + mov r7, r1 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_DecryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + + diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodyak.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodyak.c new file mode 100644 index 0000000..e0b67b5 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/Xoodyak.c @@ -0,0 +1,53 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#include +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/align.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/api.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/api.h new file mode 100644 index 0000000..8060d2b --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/brg_endian.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/config.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/hash.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/hash.c new file mode 100644 index 0000000..5955de5 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6/hash.c @@ -0,0 +1,43 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_hash.h" + +#ifndef crypto_hash_BYTES + #define crypto_hash_BYTES 32 +#endif + +#include "Xoodyak.h" + +int crypto_hash(unsigned char *out, const unsigned char *in, unsigned long long inlen) +{ + Xoodyak_Instance instance; + + Xoodyak_Initialize(&instance, NULL, 0, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, in, (size_t)inlen); + Xoodyak_Squeeze(&instance, out, crypto_hash_BYTES); + #if 0 + { + unsigned int i; + for (i = 0; i < crypto_hash_BYTES; ++i ) + { + printf("\\x%02x", out[i] ); + } + printf("\n"); + } + #endif + return 0; +} diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Cyclist.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Cyclist.inc b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Cyclist.inc new file mode 100644 index 0000000..ba7a156 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Cyclist.inc @@ -0,0 +1,327 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + assert(instance->mode == Cyclist_ModeKeyed); + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodoo-SnP.h new file mode 100644 index 0000000..7d0c98b --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodoo-SnP.h @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized ARM assembler implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#define Xoodoo_FastXoofff_supported +void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen); +size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length); +size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodoo-u1-armv6m-le-gcc.s b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodoo-u1-armv6m-le-gcc.s new file mode 100644 index 0000000..91c20c6 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodoo-u1-armv6m-le-gcc.s @@ -0,0 +1,1092 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6m architecture (Cortex-M0, ...). + + + .thumb + .syntax unified +.text + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Initialize(void *state) +@ + .align 4 +.global Xoodoo_Initialize +.type Xoodoo_Initialize, %function; +Xoodoo_Initialize: + movs r1, #0 + movs r2, #0 + movs r3, #0 + stmia r0!, { r1 - r3 } + stmia r0!, { r1 - r3 } + stmia r0!, { r1 - r3 } + stmia r0!, { r1 - r3 } + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_AddBytes +.type Xoodoo_AddBytes, %function; +Xoodoo_AddBytes: + push {r4,lr} + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_AddBytes_Bytes + movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed + orrs r2, r2, r1 + lsls r2, #30 + bne Xoodoo_AddBytes_Bytes +Xoodoo_AddBytes_LanesLoop: @ then, perform on words + ldr r2, [r0] + ldmia r1!, {r4} + eors r2, r2, r4 + stmia r0!, {r2} + subs r3, r3, #4 + bcs Xoodoo_AddBytes_LanesLoop +Xoodoo_AddBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_AddBytes_Exit + subs r3, r3, #1 +Xoodoo_AddBytes_BytesLoop: + ldrb r2, [r0, r3] + ldrb r4, [r1, r3] + eors r2, r2, r4 + strb r2, [r0, r3] + subs r3, r3, #1 + bcs Xoodoo_AddBytes_BytesLoop +Xoodoo_AddBytes_Exit: + pop {r4,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_OverwriteBytes +.type Xoodoo_OverwriteBytes, %function; +Xoodoo_OverwriteBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_OverwriteBytes_Bytes + movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed + orrs r2, r2, r1 + lsls r2, #30 + bne Xoodoo_OverwriteBytes_Bytes +Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words + ldmia r1!, {r2} + stmia r0!, {r2} + subs r3, r3, #4 + bcs Xoodoo_OverwriteBytes_LanesLoop +Xoodoo_OverwriteBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_OverwriteBytes_Exit + subs r3, r3, #1 +Xoodoo_OverwriteBytes_BytesLoop: + ldrb r2, [r1, r3] + strb r2, [r0, r3] + subs r3, r3, #1 + bcs Xoodoo_OverwriteBytes_BytesLoop +Xoodoo_OverwriteBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +@ +.global Xoodoo_OverwriteWithZeroes +.type Xoodoo_OverwriteWithZeroes, %function; +Xoodoo_OverwriteWithZeroes: + movs r3, #0 + lsrs r2, r1, #2 + beq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + stm r0!, { r3 } + subs r2, r2, #1 + bne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + lsls r1, r1, #32-2 + beq Xoodoo_OverwriteWithZeroes_Exit + lsrs r1, r1, #32-2 +Xoodoo_OverwriteWithZeroes_LoopBytes: + subs r1, r1, #1 + strb r3, [r0, r1] + bne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractBytes +.type Xoodoo_ExtractBytes, %function; +Xoodoo_ExtractBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractBytes_Bytes + movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed + orrs r2, r2, r1 + lsls r2, #30 + bne Xoodoo_ExtractBytes_Bytes +Xoodoo_ExtractBytes_LanesLoop: @ then, perform on words + ldmia r0!, {r2} + stmia r1!, {r2} + subs r3, r3, #4 + bcs Xoodoo_ExtractBytes_LanesLoop +Xoodoo_ExtractBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_ExtractBytes_Exit + subs r3, r3, #1 +Xoodoo_ExtractBytes_BytesLoop: + ldrb r2, [r0, r3] + strb r2, [r1, r3] + subs r3, r3, #1 + bcs Xoodoo_ExtractBytes_BytesLoop +Xoodoo_ExtractBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractAndAddBytes +.type Xoodoo_ExtractAndAddBytes, %function; +Xoodoo_ExtractAndAddBytes: + push {r4,r5} + adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length) + ldr r3, [sp, #8] @ get length argument from stack + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractAndAddBytes_Bytes + movs r5, r0 @ and input/output/state pointer all 32-bit .align 8ed + orrs r5, r5, r1 + orrs r5, r5, r2 + lsls r5, #30 + bne Xoodoo_ExtractAndAddBytes_Bytes +Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, perform on words + ldmia r0!, {r5} + ldmia r1!, {r4} + eors r5, r5, r4 + stmia r2!, {r5} + subs r3, r3, #4 + bcs Xoodoo_ExtractAndAddBytes_LanesLoop +Xoodoo_ExtractAndAddBytes_Bytes: + adds r3, r3, #4 + beq Xoodoo_ExtractAndAddBytes_Exit + subs r3, r3, #1 +Xoodoo_ExtractAndAddBytes_BytesLoop: + ldrb r5, [r0, r3] + ldrb r4, [r1, r3] + eors r5, r5, r4 + strb r5, [r2, r3] + subs r3, r3, #1 + bcs Xoodoo_ExtractAndAddBytes_BytesLoop +Xoodoo_ExtractAndAddBytes_Exit: + pop {r4,r5} + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- + +@ offsets in RAM state +.equ _oA00 , 0*4 +.equ _oA01 , 1*4 +.equ _oA02 , 2*4 +.equ _oA03 , 3*4 +.equ _oA10 , 4*4 +.equ _oA11 , 5*4 +.equ _oA12 , 6*4 +.equ _oA13 , 7*4 +.equ _oA20 , 8*4 +.equ _oA21 , 9*4 +.equ _oA22 , 10*4 +.equ _oA23 , 11*4 + +@ possible locations of state lanes +.equ locRegL , 1 +.equ locRegH , 2 +.equ locMem , 3 + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _r2 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, loc, tt + mov \ro, \a1 + eors \ro, \ro, \a2 + .if \loc == locRegL + eors \ro, \ro, \a0 + .else + .if \loc == locRegH + mov \tt, \a0 + .else + ldr \tt, [sp, #\a0] + .endif + eors \ro, \ro, \tt + .endif + .endm + +.macro mXor ro, ri, tt, loc + .if \loc == locRegL + eors \ro, \ro, \ri + .else + .if \loc == locRegH + mov \tt, \ro + eors \tt, \tt, \ri + mov \ro, \tt + .else + ldr \tt, [sp, #\ro] + eors \tt, \tt, \ri + str \tt, [sp, #\ro] + .endif + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1,a0s,loc + mov \r1, \a2 + mov \r0, \a1 + bics \r1, \r1, \r0 + eors \a0, \a0, \r1 + .if \loc != locRegL + .if \loc == locRegH + mov \a0s, \a0 + .else + str \a0, [sp, #\a0s] + .endif + .endif + + mov \r0, \a0 + bics \r0, \r0, \a2 + mov \r1, \a1 + eors \r1, \r1, \r0 + mov \a1, \r1 + + bics \r1, \r1, \a0 + eors \a2, \a2, \r1 + .endm + +.macro mRound offsetRC, offsetA03 + + @ Theta: Column Parity Mixer + mXor3 r0, \offsetA03, lr, r7, locMem, r2 + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r3, r10, r4, locRegL, r2 + mXor r3, r1, r2, locRegL + mXor r10, r1, r2, locRegH + mXor r4, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r8, r11, r5, locRegH, r2 + mXor r8, r1, r2, locRegH + mXor r11, r1, r2, locRegH + mXor r5, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r9, r12, r6, locRegH, r2 + mXor r9, r1, r2, locRegH + mXor r12, r1, r2, locRegH + mXor r6, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor \offsetA03, r1, r2, locMem + mXor lr, r1, r2, locRegH + mXor r7, r1, r2, locRegL + + @ Rho-west: Plane shift + movs r0, #32-_w1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + mov r0, lr + mov lr, r12 + mov r12, r11 + mov r11, r10 + mov r10, r0 + + @ Iota: round constant + ldr r0, [sp, #\offsetRC] + ldmia r0!, {r1} + str r0, [sp, #\offsetRC] + eors r3, r3, r1 + + @ Chi: non linear step, on colums + mChi3 r3, r10, r4, r0, r1, r3, locRegL + mov r2, r8 + mChi3 r2, r11, r5, r0, r1, r8, locRegH + mov r2, r9 + mChi3 r2, r12, r6, r0, r1, r9, locRegH + ldr r2, [sp, #\offsetA03] + mChi3 r2, lr, r7, r0, r1, \offsetA03, locMem + + @ Rho-east: Plane shift + movs r0, #32-1 + mov r1, r10 + rors r1, r1, r0 + mov r10, r1 + mov r1, r11 + rors r1, r1, r0 + mov r11, r1 + mov r1, r12 + rors r1, r1, r0 + mov r12, r1 + mov r1, lr + rors r1, r1, r0 + mov lr, r1 + + movs r0, #32-_e1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + + mov r0, r4 + mov r4, r6 + mov r6, r0 + mov r0, r5 + mov r5, r7 + mov r7, r0 + + .endm + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds) +@ + +@ offsets on stack +.equ Xoodoo_Permute_Nrounds_offsetA03 , 0 +.equ Xoodoo_Permute_Nrounds_offsetRC , 4 +.equ Xoodoo_Permute_Nrounds_SAS , 8 +.equ Xoodoo_Permute_Nrounds_offsetState , Xoodoo_Permute_Nrounds_SAS + +.global Xoodoo_Permute_Nrounds +.type Xoodoo_Permute_Nrounds, %function; +Xoodoo_Permute_Nrounds: + push {r4-r6,lr} + mov r2, r8 + mov r3, r9 + mov r4, r10 + mov r5, r11 + push {r0,r2-r5,r7} + + sub sp, #Xoodoo_Permute_Nrounds_SAS + adr r2, Xoodoo_Permute_RoundConstants12 + lsls r1, r1, #2 + subs r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_Nrounds_offsetRC] + + ldm r0!, {r3,r5,r6,r7} + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_Nrounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodoo_Permute_Nrouds_Loop: + mRound Xoodoo_Permute_Nrounds_offsetRC, Xoodoo_Permute_Nrounds_offsetA03 + ldr r0, [sp, #Xoodoo_Permute_Nrounds_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoodoo_Permute_Nrouds_Done + b Xoodoo_Permute_Nrouds_Loop +Xoodoo_Permute_Nrouds_Done: + ldr r0, [sp, #Xoodoo_Permute_Nrounds_offsetState] + + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_Nrounds_offsetA03] + stm r0!, {r1,r2,r3} + + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + + add sp, #Xoodoo_Permute_Nrounds_SAS + pop {r0-r4,r7} + mov r8, r1 + mov r9, r2 + mov r10, r3 + mov r11, r4 + pop {r4-r6,pc} + .align 4 + + +Xoodoo_Permute_RoundConstants: + .long 0x00000058 + .long 0x00000038 + .long 0x000003C0 + .long 0x000000D0 + .long 0x00000120 + .long 0x00000014 + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 +Xoodoo_Permute_RoundConstants12: + .long 0 + .align 4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_6rounds( void *state ) +@ +.global Xoodoo_Permute_6rounds +.type Xoodoo_Permute_6rounds, %function; +Xoodoo_Permute_6rounds: + movs r1, #6 + b Xoodoo_Permute_Nrounds + .align 4 + + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_12rounds( void *state ) +@ +.global Xoodoo_Permute_12rounds +.type Xoodoo_Permute_12rounds, %function; +Xoodoo_Permute_12rounds: + movs r1, #12 + b Xoodoo_Permute_Nrounds + .align 4 + + + +.equ Xoofff_BlockSize , 3*4*4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen) +.global Xoofff_AddIs +.type Xoofff_AddIs, %function; +Xoofff_AddIs: + push {r4-r6,lr} + movs r3, r0 @ check input and output pointer both 32-bit .align 8ed + orrs r3, r3, r1 + lsls r3, r3, #30 + bne Xoofff_AddIs_Bytes + subs r2, r2, #16*8 + bcc Xoofff_AddIs_LessThan16 +Xoofff_AddIs_16Loop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldmia r1!, {r5,r6} + eors r3, r3, r5 + eors r4, r4, r6 + stmia r0!, {r3,r4} + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldmia r1!, {r5,r6} + eors r3, r3, r5 + eors r4, r4, r6 + stmia r0!, {r3,r4} + subs r2, r2, #16*8 + bcs Xoofff_AddIs_16Loop +Xoofff_AddIs_LessThan16: + adds r2, r2, #16*8 + beq Xoofff_AddIs_Return + subs r2, r2, #4*8 + bcc Xoofff_AddIs_LessThan4 +Xoofff_AddIs_4Loop: + ldr r3, [r0] + ldmia r1!, {r4} + eors r3, r3, r4 + stmia r0!, {r3} + subs r2, r2, #4*8 + bcs Xoofff_AddIs_4Loop +Xoofff_AddIs_LessThan4: + adds r2, r2, #4*8 + beq Xoofff_AddIs_Return +Xoofff_AddIs_Bytes: + subs r2, r2, #8 + bcc Xoofff_AddIs_LessThan1 +Xoofff_AddIs_1Loop: + ldrb r3, [r0] + ldrb r4, [r1] + adds r1, r1, #1 + eors r3, r3, r4 + strb r3, [r0] + adds r0, r0, #1 + subs r2, r2, #8 + bcs Xoofff_AddIs_1Loop +Xoofff_AddIs_LessThan1: + adds r2, r2, #8 + beq Xoofff_AddIs_Return + ldrb r3, [r0] + ldrb r4, [r1] + movs r1, #1 + eors r3, r3, r4 + lsls r1, r1, r2 + subs r1, r1, #1 + ands r3, r3, r1 + strb r3, [r0] +Xoofff_AddIs_Return: + pop {r4-r6,pc} + .align 4 + + +.macro mLdu rv, ri, tt + ldrb \rv, [\ri, #3] + lsls \rv, \rv, #8 + ldrb \tt, [\ri, #2] + orrs \rv, \rv, \tt + lsls \rv, \rv, #8 + ldrb \tt, [\ri, #1] + orrs \rv, \rv, \tt + lsls \rv, \rv, #8 + ldrb \tt, [\ri, #0] + orrs \rv, \rv, \tt + adds \ri, \ri, #4 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length) +@ + +@ offsets on stack +.equ Xoofff_CompressFastLoop_offsetA03 , 0 +.equ Xoofff_CompressFastLoop_offsetRC , 4 +.equ Xoofff_CompressFastLoop_SAS , 8 +.equ Xoofff_CompressFastLoop_kRoll , Xoofff_CompressFastLoop_SAS+0 +.equ Xoofff_CompressFastLoop_input , Xoofff_CompressFastLoop_SAS+4 +.equ Xoofff_CompressFastLoop_xAccu , Xoofff_CompressFastLoop_SAS+8+16 +.equ Xoofff_CompressFastLoop_iInput , Xoofff_CompressFastLoop_SAS+12+16 +.equ Xoofff_CompressFastLoop_length , Xoofff_CompressFastLoop_SAS+16+16 + +.global Xoofff_CompressFastLoop +.type Xoofff_CompressFastLoop, %function; +Xoofff_CompressFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r0,r2,r4-r7} + sub sp, #Xoofff_CompressFastLoop_SAS + ldm r0!, {r3,r5,r6,r7} @ get initial kRoll + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoofff_CompressFastLoop_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoofff_CompressFastLoop_Loop: + adr r1, Xoofff_CompressFastLoop_RoundConstants6 + str r1, [sp, #Xoofff_CompressFastLoop_offsetRC] + + ldr r0, [sp, #Xoofff_CompressFastLoop_input] @ add input + lsls r1, r0, #30 + bne Xoofff_CompressFastLoop_Unaligned + +Xoofff_CompressFastLoop_Aligned: + ldmia r0!, {r1} + eors r3, r3, r1 + ldmia r0!, {r1} + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + ldmia r0!, {r1} + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + ldmia r0!, {r1} + ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + + ldmia r0!, {r1} + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + ldmia r0!, {r1} + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + ldmia r0!, {r1} + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + ldmia r0!, {r1} + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + ldmia r0!, {r1,r2} + eors r4, r4, r1 + eors r5, r5, r2 + ldmia r0!, {r1,r2} + eors r6, r6, r1 + eors r7, r7, r2 + + b Xoofff_CompressFastLoop_Permute + .align 4 +Xoofff_CompressFastLoop_RoundConstants6: + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 + .long 0 + +Xoofff_CompressFastLoop_Unaligned: + mLdu r1, r0, r2 + eors r3, r3, r1 + mLdu r1, r0, r2 + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + mLdu r1, r0, r2 + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + mLdu r1, r0, r2 + ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + + mLdu r1, r0, r2 + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + mLdu r1, r0, r2 + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + mLdu r1, r0, r2 + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + mLdu r1, r0, r2 + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + mLdu r1, r0, r2 + eors r4, r4, r1 + mLdu r1, r0, r2 + eors r5, r5, r1 + mLdu r1, r0, r2 + eors r6, r6, r1 + mLdu r1, r0, r2 + eors r7, r7, r1 + +Xoofff_CompressFastLoop_Permute: + str r0, [sp, #Xoofff_CompressFastLoop_input] +Xoofff_CompressFastLoop_PermuteLoop: + mRound Xoofff_CompressFastLoop_offsetRC, Xoofff_CompressFastLoop_offsetA03 + ldr r0, [sp, #Xoofff_CompressFastLoop_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoofff_CompressFastLoop_PermuteDone + b Xoofff_CompressFastLoop_PermuteLoop +Xoofff_CompressFastLoop_PermuteDone: + + @ Extract and add into xAccu + ldr r0, [sp, #Xoofff_CompressFastLoop_xAccu] + + ldr r1, [r0] + eors r1, r1, r3 + stmia r0!, {r1} + + ldr r1, [r0] + mov r2, r8 + eors r1, r1, r2 + stmia r0!, {r1} + + ldr r1, [r0] + mov r2, r9 + eors r1, r1, r2 + stmia r0!, {r1} + + ldr r1, [r0] + ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03] + eors r1, r1, r2 + stmia r0!, {r1} + + + ldr r1, [r0] + mov r2, r10 + eors r1, r1, r2 + stmia r0!, {r1} + ldr r1, [r0] + mov r2, r11 + eors r1, r1, r2 + stmia r0!, {r1} + ldr r1, [r0] + mov r2, r12 + eors r1, r1, r2 + stmia r0!, {r1} + ldr r1, [r0] + mov r2, lr + eors r1, r1, r2 + stmia r0!, {r1} + + ldr r1, [r0, #0] + ldr r2, [r0, #4] + ldr r3, [r0, #8] + eors r1, r1, r4 + ldr r4, [r0, #12] + eors r2, r2, r5 + eors r3, r3, r6 + eors r4, r4, r7 + stm r0!, {r1,r2,r3,r4} + + @roll kRoll-c + ldr r0, [sp, #Xoofff_CompressFastLoop_kRoll] + ldmia r0!, {r7} + ldmia r0!, {r4-r6} + ldmia r0!, {r3} + ldmia r0!, {r1,r2} + mov r8, r1 + mov r9, r2 + ldmia r0!, {r1,r2} + str r1, [sp, #Xoofff_CompressFastLoop_offsetA03] + mov r10, r2 + ldmia r0!, {r1,r2} + mov r11, r1 + mov r12, r2 + ldmia r0!, {r1} + mov lr, r1 + + lsls r1, r7, #13 + eors r7, r7, r1 + mov r1, r3 + movs r2, #32-3 + rors r1, r1, r2 + eors r7, r7, r1 + + subs r0, r0, #Xoofff_BlockSize + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1,r2} + ldr r1, [sp, #Xoofff_CompressFastLoop_offsetA03] + mov r2, r10 + stmia r0!, {r1,r2} + mov r1, r11 + mov r2, r12 + stmia r0!, {r1,r2} + mov r1, lr + stmia r0!, {r1,r4-r7} + + @ loop management + ldr r0, [sp, #Xoofff_CompressFastLoop_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_CompressFastLoop_length] + bcc Xoofff_CompressFastLoop_Done + b Xoofff_CompressFastLoop_Loop +Xoofff_CompressFastLoop_Done: + @ return number of bytes processed + ldr r0, [sp, #Xoofff_CompressFastLoop_input] + ldr r1, [sp, #Xoofff_CompressFastLoop_iInput] + subs r0, r0, r1 + add sp, #Xoofff_CompressFastLoop_SAS+8 + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r1-r7,pc} + .align 4 + + +.macro mStu rv, ro + strb \rv, [\ro, #0] + lsrs \rv, \rv, #8 + strb \rv, [\ro, #1] + lsrs \rv, \rv, #8 + strb \rv, [\ro, #2] + lsrs \rv, \rv, #8 + strb \rv, [\ro, #3] + adds \ro, \ro, #4 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length) +@ + +@ offsets on stack +.equ Xoofff_ExpandFastLoop_offsetA03, 0 +.equ Xoofff_ExpandFastLoop_offsetRC , 4 +.equ Xoofff_ExpandFastLoop_SAS , 8 +.equ Xoofff_ExpandFastLoop_yAccu , Xoofff_ExpandFastLoop_SAS+0 +.equ Xoofff_ExpandFastLoop_output , Xoofff_ExpandFastLoop_SAS+4 +.equ Xoofff_ExpandFastLoop_kRoll , Xoofff_ExpandFastLoop_SAS+8+16 +.equ Xoofff_ExpandFastLoop_iOutput , Xoofff_ExpandFastLoop_SAS+12+16 +.equ Xoofff_ExpandFastLoop_length , Xoofff_ExpandFastLoop_SAS+16+16 + +.global Xoofff_ExpandFastLoop +.type Xoofff_ExpandFastLoop, %function; +Xoofff_ExpandFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r0,r2,r4-r7} + sub sp, #Xoofff_ExpandFastLoop_SAS + + ldm r0!, {r3,r5,r6,r7} @ get initial yAccu + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoofff_ExpandFastLoop_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoofff_ExpandFastLoop_Loop: + adr r1, Xoofff_ExpandFastLoop_RoundConstants6 + str r1, [sp, #Xoofff_ExpandFastLoop_offsetRC] +Xoofff_ExpandFastLoop_PermuteLoop: + mRound Xoofff_ExpandFastLoop_offsetRC, Xoofff_ExpandFastLoop_offsetA03 + ldr r0, [sp, #Xoofff_ExpandFastLoop_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoofff_ExpandFastLoop_PermuteDone + b Xoofff_ExpandFastLoop_PermuteLoop +Xoofff_ExpandFastLoop_RoundConstants6: + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 + .long 0 +Xoofff_ExpandFastLoop_PermuteDone: + @ Add k and extract + ldr r0, [sp, #Xoofff_ExpandFastLoop_kRoll] + ldr r1, [sp, #Xoofff_ExpandFastLoop_output] @ add input + lsls r2, r1, #30 + bne Xoofff_ExpandFastLoop_Unaligned +Xoofff_ExpandFastLoop_Aligned: + ldmia r0!, {r2} + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r8 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r9 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + ldr r3, [sp, #Xoofff_ExpandFastLoop_offsetA03] + eors r2, r2, r3 + stmia r1!, {r2} + + ldmia r0!, {r2} + mov r3, r10 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r11 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, r12 + eors r2, r2, r3 + stmia r1!, {r2} + ldmia r0!, {r2} + mov r3, lr + eors r2, r2, r3 + stmia r1!, {r2} + + ldmia r0!, {r2,r3} + eors r2, r2, r4 + eors r3, r3, r5 + stmia r1!, {r2,r3} + ldmia r0!, {r2,r3} + eors r2, r2, r6 + eors r3, r3, r7 + stmia r1!, {r2,r3} + b Xoofff_ExpandFastLoop_ExtractDone + +Xoofff_ExpandFastLoop_Unaligned: + ldmia r0!, {r2} + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r8 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r9 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + ldr r3, [sp, #Xoofff_ExpandFastLoop_offsetA03] + eors r2, r2, r3 + mStu r2, r1 + + ldmia r0!, {r2} + mov r3, r10 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r11 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, r12 + eors r2, r2, r3 + mStu r2, r1 + ldmia r0!, {r2} + mov r3, lr + eors r2, r2, r3 + mStu r2, r1 + + ldmia r0!, {r2,r3} + eors r2, r2, r4 + mStu r2, r1 + eors r3, r3, r5 + mStu r3, r1 + ldmia r0!, {r2,r3} + eors r2, r2, r6 + mStu r2, r1 + eors r3, r3, r7 + mStu r3, r1 + +Xoofff_ExpandFastLoop_ExtractDone: + str r1, [sp, #Xoofff_ExpandFastLoop_output] + + @ roll-e yAccu + ldr r0, [sp, #Xoofff_ExpandFastLoop_yAccu] + ldmia r0!, {r7} + ldmia r0!, {r4-r6} + ldmia r0!, {r3} + ldmia r0!, {r1,r2} + mov r8, r1 + mov r9, r2 + ldmia r0!, {r1,r2} + str r1, [sp, #Xoofff_ExpandFastLoop_offsetA03] + mov r10, r2 + ldmia r0!, {r1,r2} + mov r11, r1 + mov r12, r2 + ldmia r0!, {r1} + mov lr, r1 + + mov r1, r10 + ands r1, r1, r3 + movs r2, #32-5 + rors r7, r7, r2 + eors r7, r7, r1 + movs r2, #32-13 + mov r1, r3 + rors r1, r1, r2 + eors r7, r7, r1 + movs r1, #7 + eors r7, r7, r1 + + subs r0, r0, #Xoofff_BlockSize + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1,r2} + ldr r1, [sp, #Xoofff_ExpandFastLoop_offsetA03] + mov r2, r10 + stmia r0!, {r1,r2} + mov r1, r11 + mov r2, r12 + stmia r0!, {r1,r2} + mov r1, lr + stmia r0!, {r1,r4-r7} + + @ loop management + ldr r0, [sp, #Xoofff_ExpandFastLoop_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_ExpandFastLoop_length] + bcc Xoofff_ExpandFastLoop_Done + b Xoofff_ExpandFastLoop_Loop +Xoofff_ExpandFastLoop_Done: + @ return number of bytes processed + ldr r0, [sp, #Xoofff_ExpandFastLoop_output] + ldr r1, [sp, #Xoofff_ExpandFastLoop_iOutput] + subs r0, r0, r1 + add sp, #Xoofff_ExpandFastLoop_SAS+8 + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r1-r7,pc} + .align 4 + + diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodoo.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodyak-u1-armv6m-le-gcc.s b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodyak-u1-armv6m-le-gcc.s new file mode 100644 index 0000000..91ab5a2 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodyak-u1-armv6m-le-gcc.s @@ -0,0 +1,1165 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv6m architecture (e.g., Cortex-M0). + + + .thumb + .syntax unified +.text + +@ ---------------------------------------------------------------------------- + +@ offsets in RAM state +.equ _oA00 , 0*4 +.equ _oA01 , 1*4 +.equ _oA02 , 2*4 +.equ _oA03 , 3*4 +.equ _oA10 , 4*4 +.equ _oA11 , 5*4 +.equ _oA12 , 6*4 +.equ _oA13 , 7*4 +.equ _oA20 , 8*4 +.equ _oA21 , 9*4 +.equ _oA22 , 10*4 +.equ _oA23 , 11*4 + +@ possible locations of state lanes +.equ locRegL , 1 +.equ locRegH , 2 +.equ locMem , 3 + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _r2 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +@ ---------------------------------------------------------------------------- + +.macro mLoadU r, p, o, t + ldrb \r, [\p, #\o+0] + ldrb \t, [\p, #\o+1] + lsls \t, \t, #8 + orrs \r, \r, \t + ldrb \t, [\p, #\o+2] + lsls \t, \t, #16 + orrs \r, \r, \t + ldrb \t, [\p, #\o+3] + lsls \t, \t, #24 + orrs \r, \r, \t + .endm + +.macro mStoreU p, o, s, t, loc + .if \loc == locRegL + strb \s, [\p, #\o+0] + lsrs \t, \s, #8 + .else + mov \t, \s + strb \t, [\p, #\o+0] + lsrs \t, \t, #8 + .endif + strb \t, [\p, #\o+1] + lsrs \t, \t, #8 + strb \t, [\p, #\o+2] + lsrs \t, \t, #8 + strb \t, [\p, #\o+3] + .endm + +.macro mXor3 ro, a0, a1, a2, loc, tt + mov \ro, \a1 + eors \ro, \ro, \a2 + .if \loc == locRegL + eors \ro, \ro, \a0 + .else + .if \loc == locRegH + mov \tt, \a0 + .else + ldr \tt, [sp, #\a0] + .endif + eors \ro, \ro, \tt + .endif + .endm + +.macro mXor ro, ri, tt, loc + .if \loc == locRegL + eors \ro, \ro, \ri + .else + .if \loc == locRegH + mov \tt, \ro + eors \tt, \tt, \ri + mov \ro, \tt + .else + ldr \tt, [sp, #\ro] + eors \tt, \tt, \ri + str \tt, [sp, #\ro] + .endif + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1,a0s,loc + mov \r1, \a2 + mov \r0, \a1 + bics \r1, \r1, \r0 + eors \a0, \a0, \r1 + .if \loc != locRegL + .if \loc == locRegH + mov \a0s, \a0 + .else + str \a0, [sp, #\a0s] + .endif + .endif + + mov \r0, \a0 + bics \r0, \r0, \a2 + mov \r1, \a1 + eors \r1, \r1, \r0 + mov \a1, \r1 + + bics \r1, \r1, \a0 + eors \a2, \a2, \r1 + .endm + +.macro mRound offsetRC, offsetA03 + + @ Theta: Column Parity Mixer + mXor3 r0, \offsetA03, lr, r7, locMem, r2 + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r3, r10, r4, locRegL, r2 + mXor r3, r1, r2, locRegL + mXor r10, r1, r2, locRegH + mXor r4, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r8, r11, r5, locRegH, r2 + mXor r8, r1, r2, locRegH + mXor r11, r1, r2, locRegH + mXor r5, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor3 r0, r9, r12, r6, locRegH, r2 + mXor r9, r1, r2, locRegH + mXor r12, r1, r2, locRegH + mXor r6, r1, r2, locRegL + + mov r1, r0 + movs r2, #32-(_r1-_r0) + rors r1, r1, r2 + eors r1, r1, r0 + movs r2, #32-_r0 + rors r1, r1, r2 + mXor \offsetA03, r1, r2, locMem + mXor lr, r1, r2, locRegH + mXor r7, r1, r2, locRegL + + @ Rho-west: Plane shift + movs r0, #32-_w1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + mov r0, lr + mov lr, r12 + mov r12, r11 + mov r11, r10 + mov r10, r0 + + @ Iota: round constant + ldr r0, [sp, #\offsetRC] + ldmia r0!, {r1} + str r0, [sp, #\offsetRC] + eors r3, r3, r1 + + @ Chi: non linear step, on colums + mChi3 r3, r10, r4, r0, r1, r3, locRegL + mov r2, r8 + mChi3 r2, r11, r5, r0, r1, r8, locRegH + mov r2, r9 + mChi3 r2, r12, r6, r0, r1, r9, locRegH + ldr r2, [sp, #\offsetA03] + mChi3 r2, lr, r7, r0, r1, \offsetA03, locMem + + @ Rho-east: Plane shift + movs r0, #32-1 + mov r1, r10 + rors r1, r1, r0 + mov r10, r1 + mov r1, r11 + rors r1, r1, r0 + mov r11, r1 + mov r1, r12 + rors r1, r1, r0 + mov r12, r1 + mov r1, lr + rors r1, r1, r0 + mov lr, r1 + + movs r0, #32-_e1 + rors r4, r4, r0 + rors r5, r5, r0 + rors r6, r6, r0 + rors r7, r7, r0 + + mov r0, r4 + mov r4, r6 + mov r6, r0 + mov r0, r5 + mov r5, r7 + mov r7, r0 + + .endm + +@ ---------------------------------------------------------------------------- +@ +@ Xoodoo_Permute_12roundsAsm +@ + +@ offsets on stack +.equ Xoodoo_Permute_12rounds_offsetA03 , 0 +.equ Xoodoo_Permute_12rounds_offsetRC , 4 +.equ Xoodoo_Permute_12rounds_offsetReturn, 8 +.equ Xoodoo_Permute_12rounds_SAS , 12 + + .align 4 +.type Xoodoo_Permute_12roundsAsm, %function; +Xoodoo_Permute_12roundsAsm: + adr r2, Xoodoo_Permute_RoundConstants12 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetRC] +Xoodoo_Permute_12rounds_Loop: + mRound Xoodoo_Permute_12rounds_offsetRC, Xoodoo_Permute_12rounds_offsetA03 + ldr r0, [sp, #Xoodoo_Permute_12rounds_offsetRC] + ldr r0, [r0] + cmp r0, #0 + beq Xoodoo_Permute_12rounds_Done + b Xoodoo_Permute_12rounds_Loop +Xoodoo_Permute_12rounds_Done: + ldr r0, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + bx r0 + .align 4 +Xoodoo_Permute_RoundConstants12: + .long 0x00000058 + .long 0x00000038 + .long 0x000003C0 + .long 0x000000D0 + .long 0x00000120 + .long 0x00000014 + .long 0x00000060 + .long 0x0000002C + .long 0x00000380 + .long 0x000000F0 + .long 0x000001A0 + .long 0x00000012 + .long 0 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@ +@ X += Xoodyak_Rkin@ +@ XLen -= Xoodyak_Rkin@ +@ } while (XLen >= Xoodyak_Rkin)@ +@ return initialLength - XLen@ +@ } +@ +.equ XoodyakAbsorb_offsetState , (Xoodoo_Permute_12rounds_SAS+0) +.equ XoodyakAbsorb_offsetX , (Xoodoo_Permute_12rounds_SAS+4) +.equ XoodyakAbsorb_offsetXLen , (Xoodoo_Permute_12rounds_SAS+8) +.equ XoodyakAbsorb_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+12) + +.equ XoodyakAbsorb_SAS , (Xoodoo_Permute_12rounds_SAS+20) + + .align 4 +.global Xoodyak_AbsorbKeyedFullBlocks +.type Xoodyak_AbsorbKeyedFullBlocks, %function; +Xoodyak_AbsorbKeyedFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakAbsorb_SAS + str r0, [sp, #XoodyakAbsorb_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakAbsorb_offsetX] + str r2, [sp, #XoodyakAbsorb_offsetInitialLen] + subs r2, r2, #44 + str r2, [sp, #XoodyakAbsorb_offsetXLen] + ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_AbsorbKeyedFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_AbsorbKeyedFullBlocks_Ret: + ldr r0, [sp, #XoodyakAbsorb_offsetX] + lsls r1, r0, #30 + bne Xoodyak_AbsorbKeyedFullBlocks_Unaligned +Xoodyak_AbsorbKeyedFullBlocks_Aligned: + ldmia r0!, {r1} + eors r3, r3, r1 + ldmia r0!, {r1} + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + ldmia r0!, {r1} + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + ldmia r0!, {r1} + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + + ldmia r0!, {r1} + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + ldmia r0!, {r1} + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + ldmia r0!, {r1} + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + ldmia r0!, {r1} + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + ldmia r0!, {r1} + eors r4, r4, r1 + ldmia r0!, {r1} + eors r5, r5, r1 + ldmia r0!, {r1} + eors r6, r6, r1 +Xoodyak_AbsorbKeyedFullBlocks_EndLoop: + str r0, [sp, #XoodyakAbsorb_offsetX] + movs r2, #1 + eors r7, r7, r2 + ldr r1, [sp, #XoodyakAbsorb_offsetXLen] + subs r1, r1, #44 + str r1, [sp, #XoodyakAbsorb_offsetXLen] + bcs Xoodyak_AbsorbKeyedFullBlocks_Loop + ldr r0, [sp, #XoodyakAbsorb_offsetState] + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + + ldr r0, [sp, #XoodyakAbsorb_offsetInitialLen] + ldr r2, [sp, #XoodyakAbsorb_offsetXLen] + adds r2, r2, #44 + subs r0, r0, r2 + + add sp, #XoodyakAbsorb_SAS + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_AbsorbKeyedFullBlocks_Unaligned: + mLoadU r1, r0, 0, r2 + eors r3, r3, r1 + + mLoadU r1, r0, 4, r2 + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + + mLoadU r1, r0, 8, r2 + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + + mLoadU r1, r0, 12, r2 + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + + mLoadU r1, r0, 16, r2 + mov r2, r10 + eors r2, r2, r1 + mov r10, r2 + + mLoadU r1, r0, 20, r2 + mov r2, r11 + eors r2, r2, r1 + mov r11, r2 + + mLoadU r1, r0, 24, r2 + mov r2, r12 + eors r2, r2, r1 + mov r12, r2 + + mLoadU r1, r0, 28, r2 + mov r2, lr + eors r2, r2, r1 + mov lr, r2 + + adds r0, r0, #32 + mLoadU r1, r0, 0, r2 + eors r4, r4, r1 + mLoadU r1, r0, 4, r2 + eors r5, r5, r1 + mLoadU r1, r0, 8, r2 + eors r6, r6, r1 + adds r0, r0, #12 + b Xoodyak_AbsorbKeyedFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@ +@ X += Xoodyak_Rhash@ +@ XLen -= Xoodyak_Rhash@ +@ } while (XLen >= Xoodyak_Rhash)@ +@ return initialLength - XLen@ +@ } +@ + .align 4 +.global Xoodyak_AbsorbHashFullBlocks +.type Xoodyak_AbsorbHashFullBlocks, %function; +Xoodyak_AbsorbHashFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakAbsorb_SAS + str r0, [sp, #XoodyakAbsorb_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakAbsorb_offsetX] + str r2, [sp, #XoodyakAbsorb_offsetInitialLen] + subs r2, r2, #16 + str r2, [sp, #XoodyakAbsorb_offsetXLen] + ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_AbsorbHashFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_AbsorbHashFullBlocks_Ret: + ldr r0, [sp, #XoodyakAbsorb_offsetX] + lsls r1, r0, #30 + bne Xoodyak_AbsorbHashFullBlocks_Unaligned +Xoodyak_AbsorbHashFullBlocks_Aligned: + ldmia r0!, {r1} + eors r3, r3, r1 + ldmia r0!, {r1} + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + ldmia r0!, {r1} + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + ldmia r0!, {r1} + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] +Xoodyak_AbsorbHashFullBlocks_EndLoop: + str r0, [sp, #XoodyakAbsorb_offsetX] + movs r2, #1 + mov r1, r10 + eors r1, r1, r2 + mov r10, r1 + ldr r1, [sp, #XoodyakAbsorb_offsetXLen] + subs r1, r1, #16 + str r1, [sp, #XoodyakAbsorb_offsetXLen] + bcs Xoodyak_AbsorbHashFullBlocks_Loop + ldr r0, [sp, #XoodyakAbsorb_offsetState] + + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + + ldr r0, [sp, #XoodyakAbsorb_offsetInitialLen] + ldr r2, [sp, #XoodyakAbsorb_offsetXLen] + adds r2, r2, #16 + subs r0, r0, r2 + + add sp, #XoodyakAbsorb_SAS + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_AbsorbHashFullBlocks_Unaligned: + mLoadU r1, r0, 0, r2 + eors r3, r3, r1 + mLoadU r1, r0, 4, r2 + mov r2, r8 + eors r2, r2, r1 + mov r8, r2 + mLoadU r1, r0, 8, r2 + mov r2, r9 + eors r2, r2, r1 + mov r9, r2 + mLoadU r1, r0, 12, r2 + ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + eors r2, r2, r1 + str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03] + adds r0, r0, #16 + b Xoodyak_AbsorbHashFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@ +@ Y += Xoodyak_Rkout@ +@ YLen -= Xoodyak_Rkout@ +@ } while (YLen >= Xoodyak_Rkout)@ +@ return initialLength - YLen@ +@ } +@ +.equ XoodyakSqueeze_offsetState , (Xoodoo_Permute_12rounds_SAS+0) +.equ XoodyakSqueeze_offsetY , (Xoodoo_Permute_12rounds_SAS+4) +.equ XoodyakSqueeze_offsetYLen , (Xoodoo_Permute_12rounds_SAS+8) +.equ XoodyakSqueeze_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+12) + +.equ XoodyakSqueeze_SAS , (Xoodoo_Permute_12rounds_SAS+20) + + .align 4 +.global Xoodyak_SqueezeKeyedFullBlocks +.type Xoodyak_SqueezeKeyedFullBlocks, %function; +Xoodyak_SqueezeKeyedFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakSqueeze_SAS + str r0, [sp, #XoodyakSqueeze_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakSqueeze_offsetY] + str r2, [sp, #XoodyakSqueeze_offsetInitialLen] + subs r2, r2, #24 + str r2, [sp, #XoodyakSqueeze_offsetYLen] + ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_SqueezeKeyedFullBlocks_Loop: + movs r0, #1 + eors r3, r3, r0 + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_SqueezeKeyedFullBlocks_Ret: + ldr r0, [sp, #XoodyakSqueeze_offsetY] + lsls r1, r0, #30 + bne Xoodyak_SqueezeKeyedFullBlocks_Unaligned +Xoodyak_SqueezeKeyedFullBlocks_Aligned: + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1, r2} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + mov r2, r10 + stmia r0!, {r1, r2} + mov r1, r11 + stmia r0!, {r1} +Xoodyak_SqueezeKeyedFullBlocks_EndLoop: + str r0, [sp, #XoodyakSqueeze_offsetY] + ldr r1, [sp, #XoodyakSqueeze_offsetYLen] + subs r1, r1, #24 + str r1, [sp, #XoodyakSqueeze_offsetYLen] + bcs Xoodyak_SqueezeKeyedFullBlocks_Loop + ldr r0, [sp, #XoodyakSqueeze_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakSqueeze_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakSqueeze_offsetYLen] + adds r2, r2, #24 + subs r0, r0, r2 + add sp, #XoodyakSqueeze_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_SqueezeKeyedFullBlocks_Unaligned: + mStoreU r0, 0, r3, r2, locRegL + mStoreU r0, 4, r8, r2, locRegH + mStoreU r0, 8, r9, r2, locRegH + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + mStoreU r0, 12, r1, r2, locRegL + mStoreU r0, 16, r10, r2, locRegH + mStoreU r0, 20, r11, r2, locRegH + adds r0, r0, #24 + b Xoodyak_SqueezeKeyedFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@ +@ Y += Xoodyak_Rhash@ +@ YLen -= Xoodyak_Rhash@ +@ } while (YLen >= Xoodyak_Rhash)@ +@ return initialLength - YLen@ +@ } +@ + .align 4 +.global Xoodyak_SqueezeHashFullBlocks +.type Xoodyak_SqueezeHashFullBlocks, %function; +Xoodyak_SqueezeHashFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakSqueeze_SAS + str r0, [sp, #XoodyakSqueeze_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakSqueeze_offsetY] + str r2, [sp, #XoodyakSqueeze_offsetInitialLen] + subs r2, r2, #16 + str r2, [sp, #XoodyakSqueeze_offsetYLen] + ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_SqueezeHashFullBlocks_Loop: + movs r0, #1 + eors r3, r3, r0 + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_SqueezeHashFullBlocks_Ret: + ldr r0, [sp, #XoodyakSqueeze_offsetY] + lsls r1, r0, #30 + bne Xoodyak_SqueezeHashFullBlocks_Unaligned +Xoodyak_SqueezeHashFullBlocks_Aligned: + stmia r0!, {r3} + mov r1, r8 + mov r2, r9 + stmia r0!, {r1, r2} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stmia r0!, {r1} +Xoodyak_SqueezeHashFullBlocks_EndLoop: + str r0, [sp, #XoodyakSqueeze_offsetY] + ldr r1, [sp, #XoodyakSqueeze_offsetYLen] + subs r1, r1, #16 + str r1, [sp, #XoodyakSqueeze_offsetYLen] + bcs Xoodyak_SqueezeHashFullBlocks_Loop + ldr r0, [sp, #XoodyakSqueeze_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakSqueeze_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakSqueeze_offsetYLen] + adds r2, r2, #16 + subs r0, r0, r2 + add sp, #XoodyakSqueeze_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_SqueezeHashFullBlocks_Unaligned: + mStoreU r0, 0, r3, r2, locRegL + mStoreU r0, 4, r8, r2, locRegH + mStoreU r0, 8, r9, r2, locRegH + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03] + mStoreU r0, 12, r1, r2, locRegL + adds r0, r0, #16 + b Xoodyak_SqueezeHashFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ return initialLength - IOLen@ +@ } +@ +.equ XoodyakCrypt_offsetState , (Xoodoo_Permute_12rounds_SAS+0) +.equ XoodyakCrypt_offsetI , (Xoodoo_Permute_12rounds_SAS+4) +.equ XoodyakCrypt_offsetO , (Xoodoo_Permute_12rounds_SAS+8) +.equ XoodyakCrypt_offsetIOLen , (Xoodoo_Permute_12rounds_SAS+12) +.equ XoodyakCrypt_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+16) +.equ XoodyakCrypt_SAS , (Xoodoo_Permute_12rounds_SAS+20) + + .align 4 +.global Xoodyak_EncryptFullBlocks +.type Xoodyak_EncryptFullBlocks, %function; +Xoodyak_EncryptFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakCrypt_SAS + str r0, [sp, #XoodyakCrypt_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakCrypt_offsetI] + str r2, [sp, #XoodyakCrypt_offsetO] + str r3, [sp, #XoodyakCrypt_offsetInitialLen] + subs r3, r3, #24 + str r3, [sp, #XoodyakCrypt_offsetIOLen] + ldr r5, =Xoodyak_EncryptFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_EncryptFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_EncryptFullBlocks_Ret: + push {r4, r5} + ldr r5, [sp, #XoodyakCrypt_offsetI+8] + ldr r4, [sp, #XoodyakCrypt_offsetO+8] + mov r0, r4 + ands r0, r0, r5 + lsls r0, r0, #30 + bne Xoodyak_EncryptFullBlocks_Unaligned +Xoodyak_EncryptFullBlocks_Aligned: + ldmia r5!, {r0} + eors r3, r3, r0 + stmia r4!, {r3} + + ldmia r5!, {r0} + mov r1, r8 + eors r1, r1, r0 + stmia r4!, {r1} + mov r8, r1 + + ldmia r5!, {r0} + mov r1, r9 + eors r1, r1, r0 + stmia r4!, {r1} + mov r9, r1 + + ldmia r5!, {r0} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + stmia r4!, {r1} + str r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + ldmia r5!, {r0} + mov r1, r10 + eors r1, r1, r0 + stmia r4!, {r1} + mov r10, r1 + + ldmia r5!, {r0} + mov r1, r11 + eors r1, r1, r0 + stmia r4!, {r1} + mov r11, r1 +Xoodyak_EncryptFullBlocks_EndLoop: + movs r0, #1 + mov r1, r12 + eors r1, r1, r0 + mov r12, r1 + str r5, [sp, #XoodyakCrypt_offsetI+8] + str r4, [sp, #XoodyakCrypt_offsetO+8] + pop {r4, r5} + ldr r1, [sp, #XoodyakCrypt_offsetIOLen] + subs r1, r1, #24 + str r1, [sp, #XoodyakCrypt_offsetIOLen] + bcs Xoodyak_EncryptFullBlocks_Loop + ldr r0, [sp, #XoodyakCrypt_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakCrypt_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakCrypt_offsetIOLen] + adds r2, r2, #24 + subs r0, r0, r2 + add sp, #XoodyakCrypt_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_EncryptFullBlocks_Unaligned: + mLoadU r0, r5, 0, r2 + eors r3, r3, r0 + mStoreU r4, 0, r3, r2, locRegL + + mLoadU r0, r5, 4, r2 + mov r1, r8 + eors r1, r1, r0 + mStoreU r4, 4, r1, r2, locRegL + mov r8, r1 + + mLoadU r0, r5, 8, r2 + mov r1, r9 + eors r1, r1, r0 + mStoreU r4, 8, r1, r2, locRegL + mov r9, r1 + + mLoadU r0, r5, 12, r2 + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + mStoreU r4, 12, r1, r2, locRegL + str r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + mLoadU r0, r5, 16, r2 + mov r1, r10 + eors r1, r1, r0 + mStoreU r4, 16, r1, r2, locRegL + mov r10, r1 + + mLoadU r0, r5, 20, r2 + mov r1, r11 + eors r1, r1, r0 + mStoreU r4, 20, r1, r2, locRegL + mov r11, r1 + + adds r4, r4, #24 + adds r5, r5, #24 + b Xoodyak_EncryptFullBlocks_EndLoop + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ return initialLength - IOLen@ +@ } +@ + .align 4 +.global Xoodyak_DecryptFullBlocks +.type Xoodyak_DecryptFullBlocks, %function; +Xoodyak_DecryptFullBlocks: + push {r3-r7,lr} + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + push {r4-r7} + + sub sp, #XoodyakCrypt_SAS + str r0, [sp, #XoodyakCrypt_offsetState] @ setup variables on stack + str r1, [sp, #XoodyakCrypt_offsetI] + str r2, [sp, #XoodyakCrypt_offsetO] + str r3, [sp, #XoodyakCrypt_offsetInitialLen] + subs r3, r3, #24 + str r3, [sp, #XoodyakCrypt_offsetIOLen] + ldr r5, =Xoodyak_DecryptFullBlocks_Ret+1 + str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn] + + ldm r0!, {r3,r5,r6,r7} @ state in registers + mov r8, r5 + mov r9, r6 + str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03] + ldm r0!, {r4,r5,r6,r7} + mov r10, r4 + mov r11, r5 + mov r12, r6 + mov lr, r7 + ldm r0!, {r4,r5,r6,r7} +Xoodyak_DecryptFullBlocks_Loop: + ldr r0, =Xoodoo_Permute_12roundsAsm + bx r0 + .align 4 + .ltorg +Xoodyak_DecryptFullBlocks_Ret: + push {r4, r5} + ldr r5, [sp, #XoodyakCrypt_offsetI+8] + ldr r4, [sp, #XoodyakCrypt_offsetO+8] + mov r0, r4 + ands r0, r0, r5 + lsls r0, r0, #30 + bne Xoodyak_DecryptFullBlocks_Unaligned +Xoodyak_DecryptFullBlocks_Aligned: + ldmia r5!, {r0} + eors r3, r3, r0 + stmia r4!, {r3} + mov r3, r0 + + ldmia r5!, {r0} + mov r1, r8 + eors r1, r1, r0 + stmia r4!, {r1} + mov r8, r0 + + ldmia r5!, {r0} + mov r1, r9 + eors r1, r1, r0 + stmia r4!, {r1} + mov r9, r0 + + ldmia r5!, {r0} + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + stmia r4!, {r1} + str r0, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + ldmia r5!, {r0} + mov r1, r10 + eors r1, r1, r0 + stmia r4!, {r1} + mov r10, r0 + + ldmia r5!, {r0} + mov r1, r11 + eors r1, r1, r0 + stmia r4!, {r1} + mov r11, r0 +Xoodyak_DecryptFullBlocks_EndLoop: + movs r0, #1 + mov r1, r12 + eors r1, r1, r0 + mov r12, r1 + str r5, [sp, #XoodyakCrypt_offsetI+8] + str r4, [sp, #XoodyakCrypt_offsetO+8] + pop {r4, r5} + ldr r1, [sp, #XoodyakCrypt_offsetIOLen] + subs r1, r1, #24 + str r1, [sp, #XoodyakCrypt_offsetIOLen] + bcs Xoodyak_DecryptFullBlocks_Loop + ldr r0, [sp, #XoodyakCrypt_offsetState] @ Save state + stm r0!, {r3} + mov r1, r8 + mov r2, r9 + ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03] + stm r0!, {r1,r2,r3} + mov r1, r10 + mov r2, r11 + mov r3, r12 + stm r0!, {r1,r2,r3} + mov r1, lr + stm r0!, {r1,r4,r5,r6,r7} + ldr r0, [sp, #XoodyakCrypt_offsetInitialLen] @ Compute processed length + ldr r2, [sp, #XoodyakCrypt_offsetIOLen] + adds r2, r2, #24 + subs r0, r0, r2 + add sp, #XoodyakCrypt_SAS @ Free stack and pop + pop {r4-r7} + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r3-r7,pc} +Xoodyak_DecryptFullBlocks_Unaligned: + mLoadU r0, r5, 0, r2 + eors r3, r3, r0 + mStoreU r4, 0, r3, r2, locRegL + mov r3, r0 + + mLoadU r0, r5, 4, r2 + mov r1, r8 + eors r1, r1, r0 + mStoreU r4, 4, r1, r2, locRegL + mov r8, r0 + + mLoadU r0, r5, 8, r2 + mov r1, r9 + eors r1, r1, r0 + mStoreU r4, 8, r1, r2, locRegL + mov r9, r0 + + mLoadU r0, r5, 12, r2 + ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + eors r1, r1, r0 + mStoreU r4, 12, r1, r2, locRegL + str r0, [sp, #Xoodoo_Permute_12rounds_offsetA03+8] + + mLoadU r0, r5, 16, r2 + mov r1, r10 + eors r1, r1, r0 + mStoreU r4, 16, r1, r2, locRegL + mov r10, r0 + + mLoadU r0, r5, 20, r2 + mov r1, r11 + eors r1, r1, r0 + mStoreU r4, 20, r1, r2, locRegL + mov r11, r0 + + adds r4, r4, #24 + adds r5, r5, #24 + b Xoodyak_DecryptFullBlocks_EndLoop + + diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodyak.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodyak.c new file mode 100644 index 0000000..e0b67b5 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/Xoodyak.c @@ -0,0 +1,53 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#include +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/align.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/api.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/api.h new file mode 100644 index 0000000..8060d2b --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/brg_endian.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/config.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/hash.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/hash.c new file mode 100644 index 0000000..5955de5 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv6M/hash.c @@ -0,0 +1,43 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_hash.h" + +#ifndef crypto_hash_BYTES + #define crypto_hash_BYTES 32 +#endif + +#include "Xoodyak.h" + +int crypto_hash(unsigned char *out, const unsigned char *in, unsigned long long inlen) +{ + Xoodyak_Instance instance; + + Xoodyak_Initialize(&instance, NULL, 0, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, in, (size_t)inlen); + Xoodyak_Squeeze(&instance, out, crypto_hash_BYTES); + #if 0 + { + unsigned int i; + for (i = 0; i < crypto_hash_BYTES; ++i ) + { + printf("\\x%02x", out[i] ); + } + printf("\n"); + } + #endif + return 0; +} diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Cyclist.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Cyclist.inc b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Cyclist.inc new file mode 100644 index 0000000..ba7a156 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Cyclist.inc @@ -0,0 +1,327 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + assert(instance->mode == Cyclist_ModeKeyed); + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodoo-SnP.h new file mode 100644 index 0000000..7d0c98b --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodoo-SnP.h @@ -0,0 +1,55 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized ARM assembler implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#define Xoodoo_FastXoofff_supported +void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen); +size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length); +size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodoo-uf-armv7m-le-gcc.s b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodoo-uf-armv7m-le-gcc.s new file mode 100644 index 0000000..0b72ec8 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodoo-uf-armv7m-le-gcc.s @@ -0,0 +1,729 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (Cortex-M3, ...). + + + .thumb + .syntax unified +.text + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Initialize(void *state) +@ + .align 4 +.global Xoodoo_Initialize +.type Xoodoo_Initialize, %function; +Xoodoo_Initialize: + movs r1, #0 + movs r2, #0 + movs r3, #0 + movs r12, #0 + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + stmia r0!, { r1 - r3, r12 } + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_AddBytes +.type Xoodoo_AddBytes, %function; +Xoodoo_AddBytes: + push {r4,lr} + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_AddBytes_Bytes +Xoodoo_AddBytes_LanesLoop: @ then, perform on lanes + ldr r2, [r0] + ldr r4, [r1], #4 + eors r2, r2, r4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_AddBytes_LanesLoop +Xoodoo_AddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_AddBytes_Exit +Xoodoo_AddBytes_BytesLoop: + ldrb r2, [r0] + ldrb r4, [r1], #1 + eors r2, r2, r4 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_AddBytes_BytesLoop +Xoodoo_AddBytes_Exit: + pop {r4,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_OverwriteBytes +.type Xoodoo_OverwriteBytes, %function; +Xoodoo_OverwriteBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_OverwriteBytes_Bytes +Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words + ldr r2, [r1], #4 + str r2, [r0], #4 + subs r3, r3, #4 + bcs Xoodoo_OverwriteBytes_LanesLoop +Xoodoo_OverwriteBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_OverwriteBytes_Exit +Xoodoo_OverwriteBytes_BytesLoop: + ldrb r2, [r1], #1 + strb r2, [r0], #1 + subs r3, r3, #1 + bcs Xoodoo_OverwriteBytes_BytesLoop +Xoodoo_OverwriteBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +@ +.global Xoodoo_OverwriteWithZeroes +.type Xoodoo_OverwriteWithZeroes, %function; +Xoodoo_OverwriteWithZeroes: + movs r3, #0 + lsrs r2, r1, #2 + beq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + str r3, [r0], #4 + subs r2, r2, #1 + bne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + ands r1, #3 + beq Xoodoo_OverwriteWithZeroes_Exit +Xoodoo_OverwriteWithZeroes_LoopBytes: + strb r3, [r0], #1 + subs r1, r1, #1 + bne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractBytes +.type Xoodoo_ExtractBytes, %function; +Xoodoo_ExtractBytes: + adds r0, r0, r2 @ state += offset + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractBytes_Bytes +Xoodoo_ExtractBytes_LanesLoop: @ then, handle words + ldr r2, [r0], #4 + str r2, [r1], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractBytes_LanesLoop +Xoodoo_ExtractBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractBytes_Exit +Xoodoo_ExtractBytes_BytesLoop: + ldrb r2, [r0], #1 + strb r2, [r1], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractBytes_BytesLoop +Xoodoo_ExtractBytes_Exit: + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +@ +.global Xoodoo_ExtractAndAddBytes +.type Xoodoo_ExtractAndAddBytes, %function; +Xoodoo_ExtractAndAddBytes: + push {r4,r5} + adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length) + ldr r3, [sp, #8] @ get length argument from stack + subs r3, r3, #4 @ .if length >= 4 + bcc Xoodoo_ExtractAndAddBytes_Bytes +Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, handle words + ldr r5, [r0], #4 + ldr r4, [r1], #4 + eors r5, r5, r4 + str r5, [r2], #4 + subs r3, r3, #4 + bcs Xoodoo_ExtractAndAddBytes_LanesLoop +Xoodoo_ExtractAndAddBytes_Bytes: + adds r3, r3, #3 + bcc Xoodoo_ExtractAndAddBytes_Exit +Xoodoo_ExtractAndAddBytes_BytesLoop: + ldrb r5, [r0], #1 + ldrb r4, [r1], #1 + eors r5, r5, r4 + strb r5, [r2], #1 + subs r3, r3, #1 + bcs Xoodoo_ExtractAndAddBytes_BytesLoop +Xoodoo_ExtractAndAddBytes_Exit: + pop {r4,r5} + bx lr + .align 4 + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1, 0x00000003 +.equ _rc5x2, 0x0b000000 +.equ _rc4x3, 0x07000000 +.equ _rc3x4, 0x000f0000 +.equ _rc2x5, 0x0000d000 +.equ _rc1x6, 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_6rounds( void *state ) +@ +.global Xoodoo_Permute_6rounds +.type Xoodoo_Permute_6rounds, %function; +Xoodoo_Permute_6rounds: + push {r0,r4-r11,lr} + ldmia r0!, {r2-r5} + ldmia r0!, {r8-r9} + ldmia r0!, {r6-r7} + ldmia r0, {r10-r12,lr} + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + pop {r0,r1} + ror r2, r2, #32-(6*_r0)%32 + ror r3, r3, #32-(6*_r0)%32 + ror r4, r4, #32-(6*_r0)%32 + ror r5, r5, #32-(6*_r0)%32 + ror r6, r6, #32-(6*_r0+1)%32 + ror r7, r7, #32-(6*_r0+1)%32 + ror r8, r8, #32-(6*_r0+1)%32 + ror r9, r9, #32-(6*_r0+1)%32 + ror r10, r10, #32-(6*_r0+_e1+_w1)%32 + ror r11, r11, #32-(6*_r0+_e1+_w1)%32 + ror r12, r12, #32-(6*_r0+_e1+_w1)%32 + ror lr, lr, #32-(6*_r0+_e1+_w1)%32 + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ void Xoodoo_Permute_12rounds( void *state ) +@ +.global Xoodoo_Permute_12rounds +.type Xoodoo_Permute_12rounds, %function; +Xoodoo_Permute_12rounds: + push {r0,r4-r11,lr} + ldmia r0, {r2-r12,lr} + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + pop {r0,r1} + stmia r0, {r2-r12,lr} + mov r4, r1 + pop {r5-r11,pc} + .align 4 + + +.equ Xoofff_BlockSize , 3*4*4 + +@ ---------------------------------------------------------------------------- +@ +@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen) +.global Xoofff_AddIs +.type Xoofff_AddIs, %function; +Xoofff_AddIs: + push {r4-r10,lr} + + subs r2, r2, #Xoofff_BlockSize*8 + bcc Xoofff_AddIs_LessThanBlock +Xoofff_AddIs_BlockLoop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + + subs r2, r2, #Xoofff_BlockSize*8 + bcs Xoofff_AddIs_BlockLoop +Xoofff_AddIs_LessThanBlock: + adds r2, r2, #Xoofff_BlockSize*8 + beq Xoofff_AddIs_Return + subs r2, r2, #16*8 + bcc Xoofff_AddIs_LessThan16 +Xoofff_AddIs_16Loop: + ldr r3, [r0, #0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + eor r3, r3, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + subs r2, r2, #16*8 + bcs Xoofff_AddIs_16Loop +Xoofff_AddIs_LessThan16: + adds r2, r2, #16*8 + beq Xoofff_AddIs_Return + subs r2, r2, #4*8 + bcc Xoofff_AddIs_LessThan4 +Xoofff_AddIs_4Loop: + ldr r3, [r0] + ldr r7, [r1], #4 + eors r3, r3, r7 + str r3, [r0], #4 + subs r2, r2, #4*8 + bcs Xoofff_AddIs_4Loop +Xoofff_AddIs_LessThan4: + adds r2, r2, #4*8 + beq Xoofff_AddIs_Return + subs r2, r2, #8 + bcc Xoofff_AddIs_LessThan1 +Xoofff_AddIs_1Loop: + ldrb r3, [r0] + ldrb r7, [r1], #1 + eors r3, r3, r7 + strb r3, [r0], #1 + subs r2, r2, #8 + bcs Xoofff_AddIs_1Loop +Xoofff_AddIs_LessThan1: + adds r2, r2, #8 + beq Xoofff_AddIs_Return + ldrb r3, [r0] + ldrb r7, [r1] + movs r1, #1 + eors r3, r3, r7 + lsls r1, r1, r2 + subs r1, r1, #1 + ands r3, r3, r1 + strb r3, [r0] +Xoofff_AddIs_Return: + pop {r4-r10,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length) +@ +.equ Xoofff_Compress_kRoll , 0 +.equ Xoofff_Compress_input , 4 +.equ Xoofff_Compress_xAccu , 8 +.equ Xoofff_Compress_iInput , 12 +.equ Xoofff_Compress_length , 16 + +.global Xoofff_CompressFastLoop +.type Xoofff_CompressFastLoop, %function; +Xoofff_CompressFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial kRoll +Xoofff_CompressFastLoop_Loop: + ldr r0, [sp, #Xoofff_Compress_input] @ add input + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors r12, r12, r1 + ldr r1, [r0], #4 + eors lr, lr, r1 + str r0, [sp, #Xoofff_Compress_input] + + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Extract and add into xAccu + ldr r0, [sp, #Xoofff_Compress_xAccu] + ldr r1, [r0] + mRloXor r2, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r2, [r0], #4 + mRloXor r3, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r3, [r0], #4 + mRloXor r4, r1, (6*_r0)%32 + ldr r1, [r0, #4] + + str r4, [r0], #4 + mRloXor r5, r1, (6*_r0)%32 + str r5, [r0], #4 + + ldm r0, {r2-r5} @ note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + mRliXor r3, r9, (6*_r0+1)%32 + mRliXor r4, r6, (6*_r0+1)%32 + mRliXor r5, r7, (6*_r0+1)%32 + stm r0!, {r2-r5} + + ldm r0, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + stm r0!, {r2-r5} + + @roll kRoll + ldr r0, [sp, #Xoofff_Compress_kRoll] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + eors lr, lr, lr, LSL #13 + eors lr, lr, r2, ROR #32-3 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Compress_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Compress_length] + bcs Xoofff_CompressFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Compress_input] + ldr r1, [sp, #Xoofff_Compress_iInput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length) +@ +.equ Xoofff_Expand_yAccu , 0 +.equ Xoofff_Expand_output , 4 +.equ Xoofff_Expand_kRoll , 8 +.equ Xoofff_Expand_iOutput , 12 +.equ Xoofff_Expand_length , 16 + +.global Xoofff_ExpandFastLoop +.type Xoofff_ExpandFastLoop, %function; +Xoofff_ExpandFastLoop: + subs r3, #Xoofff_BlockSize @ length must be greater than block size + push {r1-r12,lr} + push {r0,r2} + ldmia r0, {r2-r12,lr} @ get initial yAccu +Xoofff_ExpandFastLoop_Loop: + @ permutation + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6 + + @ Add k and extract + ldr r0, [sp, #Xoofff_Expand_kRoll] + ldr r1, [r0], #4 + mRloXor r2, r1, (6*_r0)%32 + + ldr r1, [sp, #Xoofff_Expand_output] + str r2, [r1], #4 + + ldr r2, [r0], #4 + mRloXor r3, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r3, [r1], #4 + mRloXor r4, r2, (6*_r0)%32 + ldr r2, [r0], #4 + + str r4, [r1], #4 + mRloXor r5, r2, (6*_r0)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} @ Note that r6-r8 and r7-r9 are swapped + mRliXor r2, r8, (6*_r0+1)%32 + str r2, [r1], #4 + mRliXor r3, r9, (6*_r0+1)%32 + str r3, [r1], #4 + mRliXor r4, r6, (6*_r0+1)%32 + str r4, [r1], #4 + mRliXor r5, r7, (6*_r0+1)%32 + str r5, [r1], #4 + + ldm r0!, {r2-r5} + mRliXor r2, r10, (6*_r0+_e1+_w1)%32 + str r2, [r1], #4 + mRliXor r3, r11, (6*_r0+_e1+_w1)%32 + str r3, [r1], #4 + mRliXor r4, r12, (6*_r0+_e1+_w1)%32 + str r4, [r1], #4 + mRliXor r5, lr, (6*_r0+_e1+_w1)%32 + str r5, [r1], #4 + + @ roll-e yAccu + ldr r0, [sp, #Xoofff_Expand_yAccu] + str r1, [sp, #Xoofff_Expand_output] + ldr lr, [r0], #4 + ldmia r0!, {r10-r12} + ldmia r0!, {r2-r9} + and r1, r6, r2 + eor lr, r1, lr, ROR #32-5 + eor lr, lr, r2, ROR #32-13 + eor lr, lr, #7 + sub r0, #Xoofff_BlockSize + stmia r0, {r2-r12,lr} + @ loop management + ldr r0, [sp, #Xoofff_Expand_length] + subs r0, #Xoofff_BlockSize + str r0, [sp, #Xoofff_Expand_length] + bcs Xoofff_ExpandFastLoop_Loop + @ return number of bytes processed + ldr r0, [sp, #Xoofff_Expand_output] + ldr r1, [sp, #Xoofff_Expand_iOutput] + sub r0, r0, r1 + pop {r1,r2} + pop {r1-r12,pc} + .align 4 + + diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodoo.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodyak-uf-armv7m-le-gcc.s b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodyak-uf-armv7m-le-gcc.s new file mode 100644 index 0000000..1249039 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodyak-uf-armv7m-le-gcc.s @@ -0,0 +1,565 @@ +@ +@ The eXtended Keccak Code Package (XKCP) +@ https://github.com/XKCP/XKCP +@ +@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ For more information, feedback or questions, please refer to the Keccak Team website: +@ https://keccak.team/ +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ +@ + +@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (Cortex-M3, ...). + + + .thumb + .syntax unified +.text + + +@ ---------------------------------------------------------------------------- + +.equ _r0 , 5 +.equ _r1 , 14 +.equ _t3 , 1 + +.equ _w1 , 11 + +.equ _e0 , 2 +.equ _e1 , 8 + +.equ _rc12 , 0x00000058 +.equ _rc11 , 0x00000038 +.equ _rc10 , 0x000003C0 +.equ _rc9 , 0x000000D0 +.equ _rc8 , 0x00000120 +.equ _rc7 , 0x00000014 +.equ _rc6 , 0x00000060 +.equ _rc5 , 0x0000002C +.equ _rc4 , 0x00000380 +.equ _rc3 , 0x000000F0 +.equ _rc2 , 0x000001A0 +.equ _rc1 , 0x00000012 + +.equ _rc6x1 , 0x00000003 +.equ _rc5x2 , 0x0b000000 +.equ _rc4x3 , 0x07000000 +.equ _rc3x4 , 0x000f0000 +.equ _rc2x5 , 0x0000d000 +.equ _rc1x6 , 0x00000048 + +.equ _rc12x1, 0xc0000002 +.equ _rc11x2, 0x0e000000 +.equ _rc10x3, 0x07800000 +.equ _rc9x4 , 0x000d0000 +.equ _rc8x5 , 0x00009000 +.equ _rc7x6 , 0x00000050 +.equ _rc6x7 , 0x0000000c +.equ _rc5x8 , 0x2c000000 +.equ _rc4x9 , 0x1c000000 +.equ _rc3x10, 0x003c0000 +.equ _rc2x11, 0x00034000 +.equ _rc1x12, 0x00000120 + +@ ---------------------------------------------------------------------------- + +.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2 + .if ((\rho_e1)%32) == 0 + eors \ro, \a0, \a1 + .else + eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32 + .endif + .if ((\rho_e2)%32) == 0 + eors \ro, \ro, \a2 + .else + eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32 + .endif + .endm + +.macro mRliXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ro, \ri, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mRloXor ro, ri, rot + .if ((\rot)%32) == 0 + eors \ro, \ro, \ri + .else + eor \ro, \ri, \ro, ROR #(32-(\rot))%32 + .endif + .endm + +.macro mChi3 a0,a1,a2,r0,r1 + bic \r0, \a2, \a1, ROR #_w1 + eors \a0, \a0, \r0, ROR #32-_w1 + bic \r1, \a0, \a2, ROR #32-_w1 + eors \a1, \a1, \r1 + bic \r1, \a1, \a0 + eors \a2, \a2, \r1, ROR #_w1 + .endm + +.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc + + @ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations) + mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2 + mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r2, r0, 32-_r0 + mRloXor \r6i, r0, \rho_e1-_r0 + mRloXor \r10i, r0, \rho_we2-_r0 + + mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2 + mRliXor r1, r1, _r1-_r0 + mRloXor r3, r1, 32-_r0 + mRloXor \r7i, r1, \rho_e1-_r0 + mRloXor \r11i, r1, \rho_we2-_r0 + + mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2 + mRliXor r0, r0, _r1-_r0 + mRloXor r4, r0, 32-_r0 + mRloXor \r8i, r0, \rho_e1-_r0 + mRloXor \r12i, r0, \rho_we2-_r0 + + mRliXor r1, r1, _r1-_r0 + mRloXor r5, r1, 32-_r0 + mRloXor \r9i, r1, \rho_e1-_r0 + mRloXor \lri, r1, \rho_we2-_r0 + @ After Theta the whole state is rotated -r0 + @ from here we must use a1.w instead of a1.i + + @ Iota: round constant + .if \rc == 0xc0000002 + eor r2, r2, #0x00000002 + eor r2, r2, #0xc0000000 + .else + eor r2, r2, #\rc + .endif + + @ Chi: non linear step, on colums + mChi3 r2, \r6w, \r10i, r0, r1 + mChi3 r3, \r7w, \r11i, r0, r1 + mChi3 r4, \r8w, \r12i, r0, r1 + mChi3 r5, \r9w, \lri, r0, r1 + .endm + +.equ offsetInstance , 0 +.equ offsetInitialLen , 16 +.equ offsetReturn , 20 + +@ ---------------------------------------------------------------------------- +@ +@ Xoodoo_Permute_12roundsAsm: only callable from asm +@ +.type Xoodoo_Permute_12roundsAsm, %function; +Xoodoo_Permute_12roundsAsm: + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8 + mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9 + mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10 + mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11 + mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12 + ror r2, r2, #32-(12*_r0)%32 + ror r3, r3, #32-(12*_r0)%32 + ror r4, r4, #32-(12*_r0)%32 + ror r5, r5, #32-(12*_r0)%32 + ror r6, r6, #32-(12*_r0+1)%32 + ror r7, r7, #32-(12*_r0+1)%32 + ror r8, r8, #32-(12*_r0+1)%32 + ror r9, r9, #32-(12*_r0+1)%32 + ror r10, r10, #32-(12*_r0+_e1+_w1)%32 + ror r11, r11, #32-(12*_r0+_e1+_w1)%32 + ror r12, r12, #32-(12*_r0+_e1+_w1)%32 + ror lr, lr, #32-(12*_r0+_e1+_w1)%32 + ldr pc, [sp, #offsetReturn] + .align 4 + + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@ +@ X += Xoodyak_Rkin@ +@ XLen -= Xoodyak_Rkin@ +@ } while (XLen >= Xoodyak_Rkin)@ +@ +@ return initialLength - XLen@ +@ } +@ +.equ offsetAbsorbX , 4 +.equ offsetAbsorbXLen , 8 + +.global Xoodyak_AbsorbKeyedFullBlocks +.type Xoodyak_AbsorbKeyedFullBlocks, %function; +Xoodyak_AbsorbKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #44 + ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbKeyedFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r5, r5, r1 + ldr r1, [r0], #4 + eors r6, r6, r1 + ldr r1, [r0], #4 + eors r7, r7, r1 + ldr r1, [r0], #4 + eors r8, r8, r1 + ldr r1, [r0], #4 + eors r9, r9, r1 + ldr r1, [r0], #4 + eors r10, r10, r1 + ldr r1, [r0], #4 + eors r11, r11, r1 + ldr r1, [r0], #4 + eors lr, lr, #1 + eors r12, r12, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #44 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #44 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +@ { +@ size_t initialLength = XLen@ +@ +@ do { +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */ +@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */ +@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@ +@ X += Xoodyak_Rhash@ +@ XLen -= Xoodyak_Rhash@ +@ } while (XLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - XLen@ +@ } +@ +.global Xoodyak_AbsorbHashFullBlocks +.type Xoodyak_AbsorbHashFullBlocks, %function; +Xoodyak_AbsorbHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_AbsorbHashFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_AbsorbHashFullBlocks_Ret: + ldr r0, [sp, #offsetAbsorbX] + ldr r1, [r0], #4 + eors r2, r2, r1 + ldr r1, [r0], #4 + eors r3, r3, r1 + ldr r1, [r0], #4 + eors r4, r4, r1 + ldr r1, [r0], #4 + eors r6, r6, #1 + eors r5, r5, r1 + ldr r1, [sp, #offsetAbsorbXLen] + str r0, [sp, #offsetAbsorbX] + subs r1, r1, #16 + str r1, [sp, #offsetAbsorbXLen] + bcs Xoodyak_AbsorbHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@ +@ Y += Xoodyak_Rkout@ +@ YLen -= Xoodyak_Rkout@ +@ } while (YLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - YLen@ +@ } +@ +.equ offsetSqueezeY , 4 +.equ offsetSqueezeYLen , 8 + +.global Xoodyak_SqueezeKeyedFullBlocks +.type Xoodyak_SqueezeKeyedFullBlocks, %function; +Xoodyak_SqueezeKeyedFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #24 + ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeKeyedFullBlocks_Loop: + eors r2, r2, #1 + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeKeyedFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + str r7, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #24 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeKeyedFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #24 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +@ { +@ size_t initialLength = YLen@ +@ +@ do { +@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */ +@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */ +@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@ +@ Y += Xoodyak_Rhash@ +@ YLen -= Xoodyak_Rhash@ +@ } while (YLen >= Xoodyak_Rhash)@ +@ +@ return initialLength - YLen@ +@ } +@ +.global Xoodyak_SqueezeHashFullBlocks +.type Xoodyak_SqueezeHashFullBlocks, %function; +Xoodyak_SqueezeHashFullBlocks: + push {r4-r12,lr} + mov r4, r2 @ r4 initialLength + subs r2, r2, #16 + ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_SqueezeHashFullBlocks_Loop: + eors r2, r2, #1 + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_SqueezeHashFullBlocks_Ret: + ldr r0, [sp, #offsetSqueezeY] + str r2, [r0], #4 + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + ldr r1, [sp, #offsetSqueezeYLen] + str r0, [sp, #offsetSqueezeY] + subs r1, r1, #16 + str r1, [sp, #offsetSqueezeYLen] + bcs Xoodyak_SqueezeHashFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r2, r2, #16 + sub r0, r4, r2 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ +.equ offsetCryptI , 4+8 +.equ offsetCryptO , 8+8 +.equ offsetCryptIOLen , 12 + +.global Xoodyak_EncryptFullBlocks +.type Xoodyak_EncryptFullBlocks, %function; +Xoodyak_EncryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_EncryptFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_EncryptFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_EncryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + eors r7, r7, r1 + str r7, [r10], #4 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_EncryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + .align 4 + + +@ ---------------------------------------------------------------------------- +@ +@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +@ { +@ size_t initialLength = IOLen@ +@ +@ do { +@ SnP_Permute(state)@ +@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@ +@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@ +@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@ +@ I += Xoodyak_Rkout@ +@ O += Xoodyak_Rkout@ +@ IOLen -= Xoodyak_Rkout@ +@ } while (IOLen >= Xoodyak_Rkout)@ +@ +@ return initialLength - IOLen@ +@ } +@ +.global Xoodyak_DecryptFullBlocks +.type Xoodyak_DecryptFullBlocks, %function; +Xoodyak_DecryptFullBlocks: + push {r4-r12,lr} + mov r4, r3 @ r4 initialLength + subs r3, r3, #24 + ldr r5, =Xoodyak_DecryptFullBlocks_Ret+1 + push {r0-r5} + ldmia r0, {r2-r12,lr} +Xoodyak_DecryptFullBlocks_Loop: + b.w Xoodoo_Permute_12roundsAsm +Xoodyak_DecryptFullBlocks_Ret: + push {r10, r11} + ldr r11, [sp, #offsetCryptI] + ldr r10, [sp, #offsetCryptO] + ldr r0, [r11], #4 + ldr r1, [r11], #4 + eors r2, r2, r0 + str r2, [r10], #4 + mov r2, r0 + eors r3, r3, r1 + ldr r0, [r11], #4 + str r3, [r10], #4 + mov r3, r1 + eors r4, r4, r0 + ldr r1, [r11], #4 + str r4, [r10], #4 + mov r4, r0 + eors r5, r5, r1 + ldr r0, [r11], #4 + str r5, [r10], #4 + mov r5, r1 + eors r6, r6, r0 + ldr r1, [r11], #4 + str r6, [r10], #4 + mov r6, r0 + eors r7, r7, r1 + str r7, [r10], #4 + mov r7, r1 + str r10, [sp, #offsetCryptO] + str r11, [sp, #offsetCryptI] + pop {r10, r11} + ldr r0, [sp, #offsetCryptIOLen] + eors r8, r8, #1 + subs r0, r0, #24 + str r0, [sp, #offsetCryptIOLen] + bcs Xoodyak_DecryptFullBlocks_Loop + ldr r0, [sp, #offsetInstance] + stmia r0, {r2-r12,lr} + pop {r0-r5} + adds r3, r3, #24 + sub r0, r4, r3 + pop {r4-r12,pc} + .align 4 + + diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodyak.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodyak.c new file mode 100644 index 0000000..e0b67b5 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/Xoodyak.c @@ -0,0 +1,53 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#include +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/align.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/api.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/api.h new file mode 100644 index 0000000..8060d2b --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/brg_endian.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/config.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/hash.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/hash.c new file mode 100644 index 0000000..5955de5 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-ARMv7M/hash.c @@ -0,0 +1,43 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_hash.h" + +#ifndef crypto_hash_BYTES + #define crypto_hash_BYTES 32 +#endif + +#include "Xoodyak.h" + +int crypto_hash(unsigned char *out, const unsigned char *in, unsigned long long inlen) +{ + Xoodyak_Instance instance; + + Xoodyak_Initialize(&instance, NULL, 0, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, in, (size_t)inlen); + Xoodyak_Squeeze(&instance, out, crypto_hash_BYTES); + #if 0 + { + unsigned int i; + for (i = 0; i < crypto_hash_BYTES; ++i ) + { + printf("\\x%02x", out[i] ); + } + printf("\n"); + } + #endif + return 0; +} diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Cyclist.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Cyclist.inc b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Cyclist.inc new file mode 100644 index 0000000..ba7a156 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Cyclist.inc @@ -0,0 +1,327 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + assert(instance->mode == Cyclist_ModeKeyed); + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodoo-SnP.h new file mode 100644 index 0000000..cf1b74a --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodoo-SnP.h @@ -0,0 +1,43 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "AVR8 optimized implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 1 +#define Xoodoo_HasNround + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodoo-avr8-u1.s b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodoo-avr8-u1.s new file mode 100644 index 0000000..7f83e3d --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodoo-avr8-u1.s @@ -0,0 +1,1341 @@ +; +; The eXtended Keccak Code Package (XKCP) +; https://github.com/XKCP/XKCP +; +; The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. +; +; Implementation by Ronny Van Keer, hereby denoted as "the implementer". +; +; For more information, feedback or questions, please refer to the Keccak Team website: +; https://keccak.team/ +; +; To the extent possible under law, the implementer has waived all copyright +; and related or neighboring rights to the source code in this file. +; http://creativecommons.org/publicdomain/zero/1.0/ +; +; --- +; +; This file implements Xoodoo in a SnP-compatible way. +; Please refer to SnP-documentation.h for more details. +; +; This implementation comes with Xoodoo-SnP.h in the same folder. +; Please refer to LowLevel.build for the exact list of other files it must be combined with. +; + +; INFO: Tested on ATmega1280 simulator + +; Registers used in all routines +#define zero 1 +#define rpState 24 +#define rX 26 +#define rY 28 +#define rZ 30 +#define sp 0x3D + +;---------------------------------------------------------------------------- +; +; void Xoodoo_StaticInitialize( void ) +; +.global Xoodoo_StaticInitialize + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Initialize(void *state) +; +; argument state is passed in r24:r25 +; +.global Xoodoo_Initialize +Xoodoo_Initialize: + movw rZ, r24 + ldi r23, 3*4/2 ; clear state (8 bytes / 2 lanes) per iteration +Xoodoo_Initialize_Loop: + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + st z+, zero + dec r23 + brne Xoodoo_Initialize_Loop +Xoodoo_StaticInitialize: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_AddByte(void *state, unsigned char data, unsigned int offset) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23, only LSB (r22) is used +; argument offset is passed in r20:r21, only LSB (r20) is used +; +.global Xoodoo_AddByte +Xoodoo_AddByte: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + ld r0, Z + eor r0, r22 + st Z, r0 + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23 +; argument offset is passed in r20:r21, only LSB (r20) is used +; argument length is passed in r18:r19, only LSB (r18) is used +; +.global Xoodoo_AddBytes +Xoodoo_AddBytes: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + movw rX, r22 + subi r18, 8 + brcs Xoodoo_AddBytes_Byte + ;do 8 bytes per iteration +Xoodoo_AddBytes_Loop8: + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + subi r18, 8 + brcc Xoodoo_AddBytes_Loop8 +Xoodoo_AddBytes_Byte: + ldi r19, 8 + add r18, r19 + breq Xoodoo_AddBytes_End +Xoodoo_AddBytes_Loop1: + ld r21, X+ + ld r0, Z + eor r0, r21 + st Z+, r0 + dec r18 + brne Xoodoo_AddBytes_Loop1 +Xoodoo_AddBytes_End: + ret + + +;---------------------------------------------------------------------------- +; +; void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23 +; argument offset is passed in r20:r21, only LSB (r20) is used +; argument length is passed in r18:r19, only LSB (r18) is used +; +.global Xoodoo_OverwriteBytes +Xoodoo_OverwriteBytes: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + movw rX, r22 + subi r18, 8 + brcs Xoodoo_OverwriteBytes_Byte + ;do 8 bytes per iteration +Xoodoo_OverwriteBytes_Loop8: + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + ld r0, X+ + st Z+, r0 + subi r18, 8 + brcc Xoodoo_OverwriteBytes_Loop8 +Xoodoo_OverwriteBytes_Byte: + ldi r19, 8 + add r18, r19 + breq Xoodoo_OverwriteBytes_End +Xoodoo_OverwriteBytes_Loop1: + ld r0, X+ + st Z+, r0 + dec r18 + brne Xoodoo_OverwriteBytes_Loop1 +Xoodoo_OverwriteBytes_End: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount) +; +; argument state is passed in r24:r25 +; argument byteCount is passed in r22:r23, only LSB (r22) is used +; +.global Xoodoo_OverwriteWithZeroes +Xoodoo_OverwriteWithZeroes: + movw rZ, r24 ; rZ = state + mov r23, r22 + lsr r23 + lsr r23 + lsr r23 + breq Xoodoo_OverwriteWithZeroes_Bytes +Xoodoo_OverwriteWithZeroes_LoopLanes: + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + dec r23 + brne Xoodoo_OverwriteWithZeroes_LoopLanes +Xoodoo_OverwriteWithZeroes_Bytes: + andi r22, 7 + breq Xoodoo_OverwriteWithZeroes_End +Xoodoo_OverwriteWithZeroes_LoopBytes: + st Z+, r1 + dec r22 + brne Xoodoo_OverwriteWithZeroes_LoopBytes +Xoodoo_OverwriteWithZeroes_End: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument data is passed in r22:r23 +; argument offset is passed in r20:r21, only LSB (r20) is used +; argument length is passed in r18:r19, only LSB (r18) is used +; +.global Xoodoo_ExtractBytes +Xoodoo_ExtractBytes: + movw rZ, r24 + add rZ, r20 + adc rZ+1, zero + movw rX, r22 + subi r18, 8 + brcs Xoodoo_ExtractBytes_Byte + ;do 8 bytes per iteration +Xoodoo_ExtractBytes_Loop8: + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + ld r0, Z+ + st X+, r0 + subi r18, 8 + brcc Xoodoo_ExtractBytes_Loop8 +Xoodoo_ExtractBytes_Byte: + ldi r19, 8 + add r18, r19 + breq Xoodoo_ExtractBytes_End +Xoodoo_ExtractBytes_Loop1: + ld r0, Z+ + st X+, r0 + dec r18 + brne Xoodoo_ExtractBytes_Loop1 +Xoodoo_ExtractBytes_End: + ret + +;---------------------------------------------------------------------------- +; +; void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +; +; argument state is passed in r24:r25 +; argument input is passed in r22:r23 +; argument output is passed in r20:r21 +; argument offset is passed in r18:r19, only LSB (r18) is used +; argument length is passed in r16:r17, only LSB (r16) is used +; +.global Xoodoo_ExtractAndAddBytes +Xoodoo_ExtractAndAddBytes: + tst r16 + breq Xoodoo_ExtractAndAddBytes_End + push r16 + push r28 + push r29 + movw rZ, r24 + add rZ, r18 + adc rZ+1, zero + movw rX, r22 + movw rY, r20 + subi r16, 8 + brcs Xoodoo_ExtractAndAddBytes_Byte +Xoodoo_ExtractAndAddBytes_LoopLane: + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + subi r16, 8 + brcc Xoodoo_ExtractAndAddBytes_LoopLane +Xoodoo_ExtractAndAddBytes_Byte: + ldi r19, 8 + add r16, r19 + breq Xoodoo_ExtractAndAddBytes_Done +Xoodoo_ExtractAndAddBytes_Loop1: + ld r21, Z+ + ld r0, X+ + eor r0, r21 + st Y+, r0 + dec r16 + brne Xoodoo_ExtractAndAddBytes_Loop1 +Xoodoo_ExtractAndAddBytes_Done: + pop r29 + pop r28 + pop r16 +Xoodoo_ExtractAndAddBytes_End: + ret + +Xoodoo_RoundConstants_12: + .BYTE 0x58, 0x00 + .BYTE 0x38, 0x00 + .BYTE 0xC0, 0x03 + .BYTE 0xD0, 0x00 + .BYTE 0x20, 0x01 + .BYTE 0x14, 0x00 +Xoodoo_RoundConstants_6: + .BYTE 0x60, 0x00 + .BYTE 0x2C, 0x00 + .BYTE 0x80, 0x03 + .BYTE 0xF0, 0x00 + .BYTE 0xA0, 0x01 + .BYTE 0x12, 0x00 +Xoodoo_RoundConstants_0: + .BYTE 0xFF, 0 ; terminator + + .text + +; Register variables used in permutation +#define rC0 2 // 4 regs (2-5) +#define rC1 6 // 4 regs (6-9) +#define rC2 10 // 4 regs (10-13) +#define rC3 14 // 4 regs (14-17) +#define rVv 18 // 4 regs (18-21) +#define rTt 22 // 4 regs (22-25) +// r26-27 free +#define a00 0 +#define a01 4 +#define a02 8 +#define a03 12 +#define a10 16 +#define a11 20 +#define a12 24 +#define a13 28 +#define a20 32 +#define a21 36 +#define a22 40 +#define a23 44 + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Permute_Nrounds( void *state, unsigned int nrounds ) +; +; argument state is passed in r24:r25 +; argument nrounds is passed in r22:r23 (only LSB (r22) is used) +; +.global Xoodoo_Permute_Nrounds +Xoodoo_Permute_Nrounds: + mov r26, r22 + ldi rZ+0, lo8(Xoodoo_RoundConstants_0) + ldi rZ+1, hi8(Xoodoo_RoundConstants_0) + lsl r26 + sub rZ, r26 + sbc rZ+1, zero + rjmp Xoodoo_Permute + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Permute_6rounds( void *state ) +; +; argument state is passed in r24:r25 +; +.global Xoodoo_Permute_6rounds +Xoodoo_Permute_6rounds: + ldi rZ+0, lo8(Xoodoo_RoundConstants_6) + ldi rZ+1, hi8(Xoodoo_RoundConstants_6) + rjmp Xoodoo_Permute + +;---------------------------------------------------------------------------- +; +; void Xoodoo_Permute_12rounds( void *state ) +; +; argument state is passed in r24:r25 +; +.global Xoodoo_Permute_12rounds +Xoodoo_Permute_12rounds: + ldi rZ+0, lo8(Xoodoo_RoundConstants_12) + ldi rZ+1, hi8(Xoodoo_RoundConstants_12) +Xoodoo_Permute: + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + + ; Initial Prepare Theta + movw rY, rpState + ld rC0+0, Y+ ; a00 + ld rC0+1, Y+ + ld rC0+2, Y+ + ld rC0+3, Y+ + ld rC1+0, Y+ ; a01 + ld rC1+1, Y+ + ld rC1+2, Y+ + ld rC1+3, Y+ + ld rC2+0, Y+ ; a02 + ld rC2+1, Y+ + ld rC2+2, Y+ + ld rC2+3, Y+ + ld rC3+0, Y+ ; a03 + ld rC3+1, Y+ + ld rC3+2, Y+ + ld rC3+3, Y+ + + ld r0, Y+ ; a10 + eor rC0+0, r0 + ld r0, Y+ + eor rC0+1, r0 + ld r0, Y+ + eor rC0+2, r0 + ld r0, Y+ + eor rC0+3, r0 + ld r0, Y+ ; a11 + eor rC1+0, r0 + ld r0, Y+ + eor rC1+1, r0 + ld r0, Y+ + eor rC1+2, r0 + ld r0, Y+ + eor rC1+3, r0 + ld r0, Y+ ; a12 + eor rC2+0, r0 + ld r0, Y+ + eor rC2+1, r0 + ld r0, Y+ + eor rC2+2, r0 + ld r0, Y+ + eor rC2+3, r0 + ld r0, Y+ ; a13 + eor rC3+0, r0 + ld r0, Y+ + eor rC3+1, r0 + ld r0, Y+ + eor rC3+2, r0 + ld r0, Y+ + eor rC3+3, r0 + + ld r0, Y+ ; a20 + eor rC0+0, r0 + ld r0, Y+ + eor rC0+1, r0 + ld r0, Y+ + eor rC0+2, r0 + ld r0, Y+ + eor rC0+3, r0 + ld r0, Y+ ; a21 + eor rC1+0, r0 + ld r0, Y+ + eor rC1+1, r0 + ld r0, Y+ + eor rC1+2, r0 + ld r0, Y+ + eor rC1+3, r0 + ld r0, Y+ ; a22 + eor rC2+0, r0 + ld r0, Y+ + eor rC2+1, r0 + ld r0, Y+ + eor rC2+2, r0 + ld r0, Y+ + eor rC2+3, r0 + ld r0, Y+ ; a23 + eor rC3+0, r0 + ld r0, Y+ + eor rC3+1, r0 + ld r0, Y+ + eor rC3+2, r0 + ld r0, Y+ + eor rC3+3, r0 + sbiw rY, 48 + +Xoodoo_RoundLoop: + ; Theta + Rho west + ; c0 = ROTL32(c0 ^ ROTL32(c0, 9), 5); + mov rVv+1, rC0+0 ; rol 9 + mov rVv+2, rC0+1 + mov rVv+3, rC0+2 + mov rVv+0, rC0+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC0+0 + eor rVv+1, rC0+1 + eor rVv+2, rC0+2 + eor rVv+3, rC0+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC0+0, rVv+3 + mov rC0+1, rVv+0 + mov rC0+2, rVv+1 + mov rC0+3, rVv+2 + + ; c1 = ROTL32(c1 ^ ROTL32(c1, 9), 5); + mov rVv+1, rC1+0 ; rol 9 + mov rVv+2, rC1+1 + mov rVv+3, rC1+2 + mov rVv+0, rC1+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC1+0 + eor rVv+1, rC1+1 + eor rVv+2, rC1+2 + eor rVv+3, rC1+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC1+0, rVv+3 + mov rC1+1, rVv+0 + mov rC1+2, rVv+1 + mov rC1+3, rVv+2 + + ; c2 = ROTL32(c2 ^ ROTL32(c2, 9), 5); + mov rVv+1, rC2+0 ; rol 9 + mov rVv+2, rC2+1 + mov rVv+3, rC2+2 + mov rVv+0, rC2+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC2+0 + eor rVv+1, rC2+1 + eor rVv+2, rC2+2 + eor rVv+3, rC2+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC2+0, rVv+3 + mov rC2+1, rVv+0 + mov rC2+2, rVv+1 + mov rC2+3, rVv+2 + + ; c3 = ROTL32(c3 ^ ROTL32(c3, 9), 5); + mov rVv+1, rC3+0 ; rol 9 + mov rVv+2, rC3+1 + mov rVv+3, rC3+2 + mov rVv+0, rC3+3 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + eor rVv+0, rC3+0 + eor rVv+1, rC3+1 + eor rVv+2, rC3+2 + eor rVv+3, rC3+3 + bst rVv, 0 ; rol 5 (= ror 3 + rol 8) + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + bst rVv, 0 + ror rVv+3 + ror rVv+2 + ror rVv+1 + ror rVv + bld rVv+3, 7 + mov rC3+0, rVv+3 + mov rC3+1, rVv+0 + mov rC3+2, rVv+1 + mov rC3+3, rVv+2 + + ; v1 = a13; + ldd rVv+0, Y+a13+0 + ldd rVv+1, Y+a13+1 + ldd rVv+2, Y+a13+2 + ldd rVv+3, Y+a13+3 + + ; a13 = a12 ^ c1; + ldd r0, Y+a12+0 + eor r0, rC1+0 + std Y+a13+0, r0 + ldd r0, Y+a12+1 + eor r0, rC1+1 + std Y+a13+1, r0 + ldd r0, Y+a12+2 + eor r0, rC1+2 + std Y+a13+2, r0 + ldd r0, Y+a12+3 + eor r0, rC1+3 + std Y+a13+3, r0 + + ; a12 = a11 ^ c0; + ldd r0, Y+a11+0 + eor r0, rC0+0 + std Y+a12+0, r0 + ldd r0, Y+a11+1 + eor r0, rC0+1 + std Y+a12+1, r0 + ldd r0, Y+a11+2 + eor r0, rC0+2 + std Y+a12+2, r0 + ldd r0, Y+a11+3 + eor r0, rC0+3 + std Y+a12+3, r0 + + ; a11 = a10 ^ c3; + ldd r0, Y+a10+0 + eor r0, rC3+0 + std Y+a11+0, r0 + ldd r0, Y+a10+1 + eor r0, rC3+1 + std Y+a11+1, r0 + ldd r0, Y+a10+2 + eor r0, rC3+2 + std Y+a11+2, r0 + ldd r0, Y+a10+3 + eor r0, rC3+3 + std Y+a11+3, r0 + + ; a10 = v1 ^ c2; + eor rVv+0, rC2+0 + std Y+a10+0, rVv+0 + eor rVv+1, rC2+1 + std Y+a10+1, rVv+1 + eor rVv+2, rC2+2 + std Y+a10+2, rVv+2 + eor rVv+3, rC2+3 + std Y+a10+3, rVv+3 + + ; a20 = ROTL32(a20 ^ c3, 11); + ldd rVv+0, Y+a20+3 + eor rVv+0, rC3+3 + ldd rVv+1, Y+a20+0 + eor rVv+1, rC3+0 + ldd rVv+2, Y+a20+1 + eor rVv+2, rC3+1 + ldd rVv+3, Y+a20+2 + eor rVv+3, rC3+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a20+0, rVv+0 + std Y+a20+1, rVv+1 + std Y+a20+2, rVv+2 + std Y+a20+3, rVv+3 + + ; a21 = ROTL32(a21 ^ c0, 11); + ldd rVv+0, Y+a21+3 + eor rVv+0, rC0+3 + ldd rVv+1, Y+a21+0 + eor rVv+1, rC0+0 + ldd rVv+2, Y+a21+1 + eor rVv+2, rC0+1 + ldd rVv+3, Y+a21+2 + eor rVv+3, rC0+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a21+0, rVv+0 + std Y+a21+1, rVv+1 + std Y+a21+2, rVv+2 + std Y+a21+3, rVv+3 + + ; a22 = ROTL32(a22 ^ c1, 11); + ldd rVv+0, Y+a22+3 + eor rVv+0, rC1+3 + ldd rVv+1, Y+a22+0 + eor rVv+1, rC1+0 + ldd rVv+2, Y+a22+1 + eor rVv+2, rC1+1 + ldd rVv+3, Y+a22+2 + eor rVv+3, rC1+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a22+0, rVv+0 + std Y+a22+1, rVv+1 + std Y+a22+2, rVv+2 + std Y+a22+3, rVv+3 + + ; a23 = ROTL32(a23 ^ c2, 11); + ldd rVv+0, Y+a23+3 + eor rVv+0, rC2+3 + ldd rVv+1, Y+a23+0 + eor rVv+1, rC2+0 + ldd rVv+2, Y+a23+1 + eor rVv+2, rC2+1 + ldd rVv+3, Y+a23+2 + eor rVv+3, rC2+2 + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + lsl rVv+0 + rol rVv+1 + rol rVv+2 + rol rVv+3 + adc rVv+0, zero + std Y+a23+0, rVv+0 + std Y+a23+1, rVv+1 + std Y+a23+2, rVv+2 + std Y+a23+3, rVv+3 + + ; v1 = c3; + movw rVv+0, rC3+0 + movw rVv+2, rC3+2 + + ; c3 = a03 ^ c2; /* a03 resides in c3 */ + ldd rC3+0, Y+a03+0 + eor rC3+0, rC2+0 + ldd rC3+1, Y+a03+1 + eor rC3+1, rC2+1 + ldd rC3+2, Y+a03+2 + eor rC3+2, rC2+2 + ldd rC3+3, Y+a03+3 + eor rC3+3, rC2+3 + + ; c2 = a02 ^ c1; /* a02 resides in c2 */ + ldd rC2+0, Y+a02+0 + eor rC2+0, rC1+0 + ldd rC2+1, Y+a02+1 + eor rC2+1, rC1+1 + ldd rC2+2, Y+a02+2 + eor rC2+2, rC1+2 + ldd rC2+3, Y+a02+3 + eor rC2+3, rC1+3 + + ; c1 = a01 ^ c0; /* a01 resides in c1 */ + ldd rC1+0, Y+a01+0 + eor rC1+0, rC0+0 + ldd rC1+1, Y+a01+1 + eor rC1+1, rC0+1 + ldd rC1+2, Y+a01+2 + eor rC1+2, rC0+2 + ldd rC1+3, Y+a01+3 + eor rC1+3, rC0+3 + + ; c0 = a00 ^ v1; /* a00 resides in c0 */ + ldd rC0+0, Y+a00+0 + eor rC0+0, rVv+0 + ldd rC0+1, Y+a00+1 + eor rC0+1, rVv+1 + ldd rC0+2, Y+a00+2 + eor rC0+2, rVv+2 + ldd rC0+3, Y+a00+3 + eor rC0+3, rVv+3 + + ; c0 ^= __rc; /* +Iota */ + lpm rVv+0, Z+ + lpm rVv+1, Z+ + eor rC0+0, rVv+0 + eor rC0+1, rVv+1 + + ; Chi + Rho east + Early Theta + ; a00 = c0 ^= ~a10 & a20; + ldd r0, Y+a10+0 + com r0 + ldd rTt+0, Y+a20+0 ; a20 in rTt + and r0, rTt+0 + eor rC0+0, r0 + std Y+a00+0, rC0+0 + ldd r0, Y+a10+1 + com r0 + ldd rTt+1, Y+a20+1 + and r0, rTt+1 + eor rC0+1, r0 + std Y+a00+1, rC0+1 + ldd r0, Y+a10+2 + com r0 + ldd rTt+2, Y+a20+2 + and r0, rTt+2 + eor rC0+2, r0 + std Y+a00+2, rC0+2 + ldd r0, Y+a10+3 + com r0 + ldd rTt+3, Y+a20+3 + and r0, rTt+3 + eor rC0+3, r0 + std Y+a00+3, rC0+3 + + ; a10 ^= ~a20 & c0; + com rTt+0 + and rTt+0, rC0+0 + ldd r0, Y+a10+0 + eor rTt+0, r0 ; new a10 in rTt + std Y+a10+0, rTt+0 + com rTt+1 + and rTt+1, rC0+1 + ldd r0, Y+a10+1 + eor rTt+1, r0 + std Y+a10+1, rTt+1 + com rTt+2 + and rTt+2, rC0+2 + ldd r0, Y+a10+2 + eor rTt+2, r0 + std Y+a10+2, rTt+2 + com rTt+3 + and rTt+3, rC0+3 + ldd r0, Y+a10+3 + eor rTt+3, r0 + std Y+a10+3, rTt+3 + + ; v1(a20) = ROTL32(a20 ^ ~c0 & a10, 8); + movw rVv+0, rTt+0 ; a10 in rVv + movw rVv+2, rTt+2 + mov r0, rC0+0 + com r0 + and rTt+0, r0 + ldd r0, Y+a20+0 + eor rTt+0, r0 + + mov r0, rC0+1 + com r0 + and rTt+1, r0 + ldd r0, Y+a20+1 + eor rTt+1, r0 + + mov r0, rC0+2 + com r0 + and rTt+2, r0 + ldd r0, Y+a20+2 + eor rTt+2, r0 + + mov r0, rC0+3 + com r0 + and rTt+3, r0 + ldd r0, Y+a20+3 + eor rTt+3, r0 + std Y+a20+0, rTt+3 + std Y+a20+1, rTt+0 + std Y+a20+2, rTt+1 + std Y+a20+3, rTt+2 + + ; c0 ^= a10 = ROTL32(a10, 1); + lsl rVv+0 + rol rVv+1 + std Y+a10+1, rVv+1 + eor rC0+1, rVv+1 + rol rVv+2 + std Y+a10+2, rVv+2 + eor rC0+2, rVv+2 + rol rVv+3 + std Y+a10+3, rVv+3 + eor rC0+3, rVv+3 + adc rVv+0, zero + std Y+a10+0, rVv+0 + eor rC0+0, rVv+0 + + ; a02 = c2 ^= ~a12 & a22; + ldd r0, Y+a12+0 + com r0 + ldd rVv+0, Y+a22+0 ; a22 in rVv + and r0, rVv+0 + eor rC2+0, r0 + std Y+a02+0, rC2+0 + ldd r0, Y+a12+1 + com r0 + ldd rVv+1, Y+a22+1 + and r0, rVv+1 + eor rC2+1, r0 + std Y+a02+1, rC2+1 + ldd r0, Y+a12+2 + com r0 + ldd rVv+2, Y+a22+2 + and r0, rVv+2 + eor rC2+2, r0 + std Y+a02+2, rC2+2 + ldd r0, Y+a12+3 + com r0 + ldd rVv+3, Y+a22+3 + and r0, rVv+3 + eor rC2+3, r0 + std Y+a02+3, rC2+3 + + ; a12 ^= ~a22 & c2; + mov r0, rVv+0 ; a12 in rTt + com r0 + and r0, rC2+0 + ldd rTt+0, Y+a12+0 + eor rTt+0, r0 + std Y+a12+0, rTt+0 + mov r0, rVv+1 + com r0 + and r0, rC2+1 + ldd rTt+1, Y+a12+1 + eor rTt+1, r0 + std Y+a12+1, rTt+1 + mov r0, rVv+2 + com r0 + and r0, rC2+2 + ldd rTt+2, Y+a12+2 + eor rTt+2, r0 + std Y+a12+2, rTt+2 + mov r0, rVv+3 + com r0 + and r0, rC2+3 + ldd rTt+3, Y+a12+3 + eor rTt+3, r0 + std Y+a12+3, rTt+3 + + ; c0 ^= a20 = ROTL32(a22 ^ ~c2 & a12, 8); + mov r0, rC2+0 + com r0 + and r0, rTt+0 + eor r0, rVv+0 + ldd rVv+0, Y+a20+1 ; rVv = a22 + std Y+a20+1, r0 + eor rC0+1, r0 + mov r0, rC2+1 + com r0 + and r0, rTt+1 + eor r0, rVv+1 + ldd rVv+1, Y+a20+2 + std Y+a20+2, r0 + eor rC0+2, r0 + mov r0, rC2+2 + com r0 + and r0, rTt+2 + eor r0, rVv+2 + ldd rVv+2, Y+a20+3 + std Y+a20+3, r0 + eor rC0+3, r0 + mov r0, rC2+3 + com r0 + and r0, rTt+3 + eor r0, rVv+3 + ldd rVv+3, Y+a20+0 + std Y+a20+0, r0 + eor rC0+0, r0 + + ; c2 ^= a12 = ROTL32(a12, 1); + lsl rTt+0 + rol rTt+1 + eor rC2+1, rTt+1 + std Y+a12+1, rTt+1 + rol rTt+2 + eor rC2+2, rTt+2 + std Y+a12+2, rTt+2 + rol rTt+3 + eor rC2+3, rTt+3 + std Y+a12+3, rTt+3 + adc rTt+0, zero + eor rC2+0, rTt+0 + std Y+a12+0, rTt+0 + + ; a22 = v1; + std Y+a22+0, rVv+3 + std Y+a22+1, rVv+0 + std Y+a22+2, rVv+1 + std Y+a22+3, rVv+2 + + ; c2 ^= v1; + eor rC2+0, rVv+3 + eor rC2+1, rVv+0 + eor rC2+2, rVv+1 + eor rC2+3, rVv+2 + + ; a01 = c1 ^= ~a11 & a21; + ldd rTt+0, Y+a11+0 ;rTt holds a11 + mov r0, rTt+0 + com r0 + ldd rVv+0, Y+a21+0 ;rVv holds a21 + and r0, rVv+0 + eor rC1+0, r0 + std Y+a01+0, rC1+0 + ldd rTt+1, Y+a11+1 + mov r0, rTt+1 + com r0 + ldd rVv+1, Y+a21+1 + and r0, rVv+1 + eor rC1+1, r0 + std Y+a01+1, rC1+1 + ldd rTt+2, Y+a11+2 + mov r0, rTt+2 + com r0 + ldd rVv+2, Y+a21+2 + and r0, rVv+2 + eor rC1+2, r0 + std Y+a01+2, rC1+2 + ldd rTt+3, Y+a11+3 + mov r0, rTt+3 + com r0 + ldd rVv+3, Y+a21+3 + and r0, rVv+3 + eor rC1+3, r0 + std Y+a01+3, rC1+3 + + ; a11 ^= ~a21 & c1; + mov r0, rVv+0 + com r0 + and r0, rC1+0 + eor rTt+0, r0 + std Y+a11+0, rTt+0 + mov r0, rVv+1 + com r0 + and r0, rC1+1 + eor rTt+1, r0 + std Y+a11+1, rTt+1 + mov r0, rVv+2 + com r0 + and r0, rC1+2 + eor rTt+2, r0 + std Y+a11+2, rTt+2 + mov r0, rVv+3 + com r0 + and r0, rC1+3 + eor rTt+3, r0 + std Y+a11+3, rTt+3 + + ; v1 = ROTL32(a21 ^ ~c1 & a11, 8); + mov r0, rC1+0 + com r0 + and r0, rTt+0 + eor rVv+0, r0 ; v1 not yet ROTL32'ed(8) + mov r0, rC1+1 + com r0 + and r0, rTt+1 + eor rVv+1, r0 + mov r0, rC1+2 + com r0 + and r0, rTt+2 + eor rVv+2, r0 + mov r0, rC1+3 + com r0 + and r0, rTt+3 + eor rVv+3, r0 + + ; c1 ^= a11 = ROTL32(a11, 1); + lsl rTt+0 + rol rTt+1 + eor rC1+1, rTt+1 + std Y+a11+1, rTt+1 + rol rTt+2 + eor rC1+2, rTt+2 + std Y+a11+2, rTt+2 + rol rTt+3 + eor rC1+3, rTt+3 + std Y+a11+3, rTt+3 + adc rTt+0, zero + eor rC1+0, rTt+0 + std Y+a11+0, rTt+0 + + ; a03 = c3 ^= ~a13 & a23; + ldd r0, Y+a13+0 + com r0 + ldd rTt+0, Y+a23+0 ; a23 in rTt + and r0, rTt+0 + eor rC3+0, r0 + std Y+a03+0, rC3+0 + ldd r0, Y+a13+1 + com r0 + ldd rTt+1, Y+a23+1 + and r0, rTt+1 + eor rC3+1, r0 + std Y+a03+1, rC3+1 + ldd r0, Y+a13+2 + com r0 + ldd rTt+2, Y+a23+2 + and r0, rTt+2 + eor rC3+2, r0 + std Y+a03+2, rC3+2 + ldd r0, Y+a13+3 + com r0 + ldd rTt+3, Y+a23+3 + and r0, rTt+3 + eor rC3+3, r0 + std Y+a03+3, rC3+3 + + ; a13 ^= ~a23 & c3; + mov r0, rTt+0 + com r0 + and r0, rC3+0 + ldd rTt+0, Y+a13+0 ; a13 in rTt + eor rTt+0, r0 + mov r0, rTt+1 + com r0 + and r0, rC3+1 + ldd rTt+1, Y+a13+1 + eor rTt+1, r0 + mov r0, rTt+2 + com r0 + and r0, rC3+2 + ldd rTt+2, Y+a13+2 + eor rTt+2, r0 + mov r0, rTt+3 + com r0 + and r0, rC3+3 + ldd rTt+3, Y+a13+3 + eor rTt+3, r0 + + ; c1 ^= a21 = ROTL32(a23 ^ ~c3 & a13, 8); + push rVv + mov r0, rC3+0 + com r0 + and r0, rTt+0 + ldd rVv, Y+a23+0 + eor r0, rVv + eor rC1+1, r0 + std Y+a21+1, r0 + mov r0, rC3+1 + com r0 + and r0, rTt+1 + ldd rVv, Y+a23+1 + eor r0, rVv + eor rC1+2, r0 + std Y+a21+2, r0 + mov r0, rC3+2 + com r0 + and r0, rTt+2 + ldd rVv, Y+a23+2 + eor r0, rVv + eor rC1+3, r0 + std Y+a21+3, r0 + mov r0, rC3+3 + com r0 + and r0, rTt+3 + ldd rVv, Y+a23+3 + eor r0, rVv + eor rC1+0, r0 + std Y+a21+0, r0 + pop rVv + + ; a23 = v1; + std Y+a23+0, rVv+3 ; rol8(rVv) + std Y+a23+1, rVv+0 + std Y+a23+2, rVv+1 + std Y+a23+3, rVv+2 + + ; c3 ^= v1; + eor rC3+0, rVv+3 + eor rC3+1, rVv+0 + eor rC3+2, rVv+1 + eor rC3+3, rVv+2 + + ; c3 ^= a13 = ROTL32(a13, 1); + lsl rTt+0 + rol rTt+1 + std Y+a13+1, rTt+1 + eor rC3+1, rTt+1 + rol rTt+2 + std Y+a13+2, rTt+2 + eor rC3+2, rTt+2 + rol rTt+3 + std Y+a13+3, rTt+3 + eor rC3+3, rTt+3 + adc rTt+0, zero + std Y+a13+0, rTt+0 + eor rC3+0, rTt+0 + + ; Check for terminator + lpm r0, Z + inc r0 + breq Xoodoo_Done + rjmp Xoodoo_RoundLoop +Xoodoo_Done: + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + ret diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodoo.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodyak.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodyak.c new file mode 100644 index 0000000..e0b67b5 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/Xoodyak.c @@ -0,0 +1,53 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#include +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/align.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/api.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/api.h new file mode 100644 index 0000000..8060d2b --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/brg_endian.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/config.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/hash.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/hash.c new file mode 100644 index 0000000..5955de5 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-AVR8/hash.c @@ -0,0 +1,43 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_hash.h" + +#ifndef crypto_hash_BYTES + #define crypto_hash_BYTES 32 +#endif + +#include "Xoodyak.h" + +int crypto_hash(unsigned char *out, const unsigned char *in, unsigned long long inlen) +{ + Xoodyak_Instance instance; + + Xoodyak_Initialize(&instance, NULL, 0, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, in, (size_t)inlen); + Xoodyak_Squeeze(&instance, out, crypto_hash_BYTES); + #if 0 + { + unsigned int i; + for (i = 0; i < crypto_hash_BYTES; ++i ) + { + printf("\\x%02x", out[i] ); + } + printf("\n"); + } + #endif + return 0; +} diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Cyclist.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Cyclist.h new file mode 100644 index 0000000..54522bb --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Cyclist.h @@ -0,0 +1,66 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Cyclist_h_ +#define _Cyclist_h_ + +#include +#include "align.h" + +#define Cyclist_ModeHash 1 +#define Cyclist_ModeKeyed 2 + +#define Cyclist_PhaseDown 1 +#define Cyclist_PhaseUp 2 + +#ifdef OUTPUT + +#include + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + uint8_t stateShadow[size]; \ + FILE *file; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#else + +#define KCP_DeclareCyclistStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \ + uint8_t state[size]; \ + unsigned int phase; \ + unsigned int mode; \ + unsigned int Rabsorb; \ + unsigned int Rsqueeze; \ + } prefix##_Instance; + +#endif + +#define KCP_DeclareCyclistFunctions(prefix) \ + void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \ + void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \ + void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \ + void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \ + void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \ + void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \ + void prefix##_Ratchet(prefix##_Instance *instance); + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Cyclist.inc b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Cyclist.inc new file mode 100644 index 0000000..ba7a156 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Cyclist.inc @@ -0,0 +1,327 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) + +#define Cyclist_Instance JOIN(prefix, _Instance) +#define Cyclist_Initialize JOIN(prefix, _Initialize) +#define Cyclist_Absorb JOIN(prefix, _Absorb) +#define Cyclist_Encrypt JOIN(prefix, _Encrypt) +#define Cyclist_Decrypt JOIN(prefix, _Decrypt) +#define Cyclist_Squeeze JOIN(prefix, _Squeeze) +#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey) +#define Cyclist_Ratchet JOIN(prefix, _Ratchet) + +#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny) +#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey) +#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny) +#define Cyclist_Down JOIN(prefix, _Down) +#define Cyclist_Up JOIN(prefix, _Up) +#define Cyclist_Crypt JOIN(prefix, _Crypt) + +#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime) +#define Cyclist_Rhash JOIN(prefix, _Rhash) +#define Cyclist_Rkin JOIN(prefix, _Rkin) +#define Cyclist_Rkout JOIN(prefix, _Rkout) +#define Cyclist_lRatchet JOIN(prefix, _lRatchet) + +#if defined(CyclistFullBlocks_supported) +#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks) +#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks) +#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks) +#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks) +#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks) +#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks) +#endif + +/* ------- Cyclist internal interfaces ------- */ + +static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd) +{ + SnP_AddBytes(instance->state, Xi, 0, XiLen); + SnP_AddByte(instance->state, 0x01, XiLen); + SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1); + instance->phase = Cyclist_PhaseDown; + +} + +static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu) +{ + #if defined(OUTPUT) + uint8_t s[Cyclist_f_bPrime]; + #endif + + if (instance->mode != Cyclist_ModeHash) { + SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1); + } + #if defined(OUTPUT) + if (instance->file != NULL) { + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime ); + } + #endif + SnP_Permute( instance->state ); + #if defined(OUTPUT) + if (instance->file != NULL) { + memcpy( instance->stateShadow, instance->state, sizeof(instance->state) ); + fprintf( instance->file, "Data XORed" ); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime ); + fprintf( instance->file, "After f() "); + displayByteString( instance->file, "", s, Cyclist_f_bPrime ); + } + #endif + instance->phase = Cyclist_PhaseUp; + SnP_ExtractBytes( instance->state, Yi, 0, YiLen ); +} + +static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd) +{ + unsigned int splitLen; + + do { + if (instance->phase != Cyclist_PhaseUp) { + Cyclist_Up(instance, NULL, 0, 0); + } + splitLen = MyMin(XLen, r); + Cyclist_Down(instance, X, splitLen, Cd); + Cd = 0; + X += splitLen; + XLen -= splitLen; + #if defined(CyclistFullBlocks_supported) + if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen); + X += lenProcessed; + XLen -= lenProcessed; + } + #endif + } while ( XLen != 0 ); +} + +static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + uint8_t KID[Cyclist_Rkin]; + + assert(instance->mode == Cyclist_ModeHash); + assert((KLen + IDLen) <= (Cyclist_Rkin - 1)); + + instance->mode = Cyclist_ModeKeyed; + instance->Rabsorb = Cyclist_Rkin; + instance->Rsqueeze = Cyclist_Rkout; + if (KLen != 0) { + memcpy(KID, K, KLen); + memcpy(KID + KLen, ID, IDLen); + KID[KLen + IDLen] = (uint8_t)IDLen; + Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02); + if (counterLen != 0) { + Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00); + } + } +} + +static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu) +{ + unsigned int len; + + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, Cu); + Y += len; + YLen -= len; + while (YLen != 0) { + #if defined(CyclistFullBlocks_supported) + if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) { + size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) { + size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen); + Y += lenProcessed; + YLen -= lenProcessed; + } + else + #endif + { + Cyclist_Down(instance, NULL, 0, 0); + len = MyMin(YLen, instance->Rsqueeze ); + Cyclist_Up(instance, Y, len, 0); + Y += len; + YLen -= len; + } + } +} + +static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt) +{ + unsigned int splitLen; + uint8_t P[Cyclist_Rkout]; + uint8_t Cu = 0x80; + + do { + if (decrypt != 0) { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, O, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + else { + #if defined(CyclistFullBlocks_supported) + if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) { + size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen); + I += lenProcessed; + O += lenProcessed; + IOLen -= lenProcessed; + } + else + #endif + { + splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */ + memcpy(P, I, splitLen); + Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */ + Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */ + Cyclist_Down(instance, P, splitLen, 0x00); + I += splitLen; + O += splitLen; + IOLen -= splitLen; + } + } + Cu = 0x00; + } while ( IOLen != 0 ); +} + +/* ------- Cyclist interfaces ------- */ + +void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen) +{ + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->phase = Cyclist_PhaseUp; + instance->mode = Cyclist_ModeHash; + instance->Rabsorb = Cyclist_Rhash; + instance->Rsqueeze = Cyclist_Rhash; + #ifdef OUTPUT + instance->file = 0; + SnP_Initialize( instance->stateShadow ); + #endif + if (KLen != 0) { + Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen); + } +} + +void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen) +{ + Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03); +} + +void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, P, C, PLen, 0); +} + +void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_Crypt(instance, C, P, CLen, 1); +} + +void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen) +{ + Cyclist_SqueezeAny(instance, Y, YLen, 0x40); +} + +void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen) +{ + assert(instance->mode == Cyclist_ModeKeyed); + Cyclist_SqueezeAny(instance, K, KLen, 0x20); +} + +void Cyclist_Ratchet(Cyclist_Instance *instance) +{ + uint8_t buffer[Cyclist_lRatchet]; + + assert(instance->mode == Cyclist_ModeKeyed); + /* Squeeze then absorb is the same as overwriting with zeros */ + Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10); + Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00); +} + +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddBytes +#undef SnP_AddByte +#undef SnP_OverwriteBytes +#undef SnP_ExtractBytes +#undef SnP_ExtractAndAddBytes + +#undef Cyclist_Instance +#undef Cyclist_Initialize +#undef Cyclist_Absorb +#undef Cyclist_Encrypt +#undef Cyclist_Decrypt +#undef Cyclist_Squeeze +#undef Cyclist_SqueezeKey +#undef Cyclist_Ratchet + +#undef Cyclist_AbsorbAny +#undef Cyclist_AbsorbKey +#undef Cyclist_SqueezeAny +#undef Cyclist_Down +#undef Cyclist_Up +#undef Cyclist_Crypt + +#undef Cyclist_f_bPrime +#undef Cyclist_Rhash +#undef Cyclist_Rkin +#undef Cyclist_Rkout +#undef Cyclist_lRatchet + +#if defined(CyclistFullBlocks_supported) +#undef Cyclist_AbsorbKeyedFullBlocks +#undef Cyclist_AbsorbHashFullBlocks +#undef Cyclist_SqueezeKeyedFullBlocks +#undef Cyclist_SqueezeHashFullBlocks +#undef Cyclist_EncryptFullBlocks +#undef Cyclist_DecryptFullBlocks +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodoo-SnP.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodoo-SnP.h new file mode 100644 index 0000000..2927919 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodoo-SnP.h @@ -0,0 +1,56 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_SnP_h_ +#define _Xoodoo_SnP_h_ + +#include +#include + +/** For the documentation, see SnP-documentation.h. + */ + +#define Xoodoo_implementation "32-bit optimized implementation" +#define Xoodoo_stateSizeInBytes (3*4*4) +#define Xoodoo_stateAlignment 4 +#define Xoodoo_HasNround + +#define Xoodoo_StaticInitialize() +void Xoodoo_Initialize(void *state); +#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData) +void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount); +void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds); +void Xoodoo_Permute_6rounds(void *state); +void Xoodoo_Permute_12rounds(void *state); +void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length); +void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length); + +//#define Xoodoo_FastXoofff_supported +//void Xoofff_AddIs( uint8_t *output, const uint8_t *input, size_t bitLen); +//size_t Xoofff_CompressFastLoop(uint8_t *k, uint8_t *xAccu, const uint8_t *input, size_t length); +//size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length); + +#define CyclistFullBlocks_supported +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen); +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen); +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen); + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodoo-optimized.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodoo-optimized.c new file mode 100644 index 0000000..ce86971 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodoo-optimized.c @@ -0,0 +1,399 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include +#include "Xoodoo.h" + +#define VERBOSE 0 + +#if (VERBOSE > 0) + #define Dump(__t) printf(__t "\n"); \ + printf("a00 %08x, a01 %08x, a02 %08x, a03 %08x\n", a00, a01, a02, a03 ); \ + printf("a10 %08x, a11 %08x, a12 %08x, a13 %08x\n", a10, a11, a12, a13 ); \ + printf("a20 %08x, a21 %08x, a22 %08x, a23 %08x\n\n", a20, a21, a22, a23 ); +#else + #define Dump(__t) +#endif + +#if (VERBOSE >= 1) + #define Dump1(__t) Dump(__t) +#else + #define Dump1(__t) +#endif + +#if (VERBOSE >= 2) + #define Dump2(__t) Dump(__t) +#else + #define Dump2(__t) +#endif + +#if (VERBOSE >= 3) + #define Dump3(__t) Dump(__t) +#else + #define Dump3(__t) +#endif + +/* ---------------------------------------------------------------- */ + +void Xoodoo_Initialize(void *state) +{ + memset(state, 0, NLANES*sizeof(tXoodooLane)); +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_AddBytes(void *argState, const unsigned char *argdata, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + if (length == (3*4*4)) { + uint32_t *state = (uint32_t *)argState; + uint32_t *data = (uint32_t *)argdata; + state[0] ^= data[0]; + state[1] ^= data[1]; + state[2] ^= data[2]; + state[3] ^= data[3]; + state[4] ^= data[4]; + state[5] ^= data[5]; + state[6] ^= data[6]; + state[7] ^= data[7]; + state[8] ^= data[8]; + state[9] ^= data[9]; + state[10] ^= data[10]; + state[11] ^= data[11]; + } + else { + unsigned int sizeLeft = length; + unsigned int lanePosition = offset/4; + unsigned int offsetInLane = offset%4; + const unsigned char *curData = argdata; + uint32_t *state = (uint32_t*)argState; + + state += lanePosition; + if ((sizeLeft > 0) && (offsetInLane != 0)) { + unsigned int bytesInLane = 4 - offsetInLane; + uint32_t lane = 0; + if (bytesInLane > sizeLeft) + bytesInLane = sizeLeft; + memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane); + *state++ ^= lane; + sizeLeft -= bytesInLane; + curData += bytesInLane; + } + + while(sizeLeft >= 4) { + *state++ ^= READ32_UNALIGNED( curData ); + sizeLeft -= 4; + curData += 4; + } + + if (sizeLeft > 0) { + uint32_t lane = 0; + memcpy(&lane, curData, sizeLeft); + *state ^= lane; + } + } +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_OverwriteBytes(void *argstate, const unsigned char *argdata, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + if (length == (3*4*4)) { + uint32_t *state = (uint32_t *)argstate; + uint32_t *data = (uint32_t *)argdata; + state[0] = data[0]; + state[1] = data[1]; + state[2] = data[2]; + state[3] = data[3]; + state[4] = data[4]; + state[5] = data[5]; + state[6] = data[6]; + state[7] = data[7]; + state[8] = data[8]; + state[9] = data[9]; + state[10] = data[10]; + state[11] = data[11]; + } + else + memcpy((unsigned char*)argstate+offset, argdata, length); +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_OverwriteWithZeroes(void *argstate, unsigned int byteCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memset(argstate, 0, byteCount); +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, (unsigned char*)state+offset, length); +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void Xoodoo_ExtractAndAddBytes(const void *argState, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + if (length == (3*4*4)) { + uint32_t *state = (uint32_t *)argState; + const uint32_t *ii = (const uint32_t *)input; + uint32_t *oo = (uint32_t *)output; + + oo[0] = state[0] ^ ii[0]; + oo[1] = state[1] ^ ii[1]; + oo[2] = state[2] ^ ii[2]; + oo[3] = state[3] ^ ii[3]; + oo[4] = state[4] ^ ii[4]; + oo[5] = state[5] ^ ii[5]; + oo[6] = state[6] ^ ii[6]; + oo[7] = state[7] ^ ii[7]; + oo[8] = state[8] ^ ii[8]; + oo[9] = state[9] ^ ii[9]; + oo[10] = state[10] ^ ii[10]; + oo[11] = state[11] ^ ii[11]; + } + else { + unsigned int sizeLeft = length; + unsigned int lanePosition = offset/4; + unsigned int offsetInLane = offset%4; + const unsigned char *curInput = input; + unsigned char *curOutput = output; + const uint32_t *state = (const uint32_t*)argState; + + state += lanePosition; + if ((sizeLeft > 0) && (offsetInLane != 0)) { + unsigned int bytesInLane = 4 - offsetInLane; + uint32_t lane = *state++ >> (offsetInLane * 8); + if (bytesInLane > sizeLeft) + bytesInLane = sizeLeft; + sizeLeft -= bytesInLane; + do { + *curOutput++ = (*curInput++) ^ (unsigned char)lane; + lane >>= 8; + } + while ( --bytesInLane != 0); + } + + while(sizeLeft >= 4) { + WRITE32_UNALIGNED( curOutput, READ32_UNALIGNED( curInput ) ^ *state++ ); + sizeLeft -= 4; + curInput += 4; + curOutput += 4; + } + + if (sizeLeft > 0) { + uint32_t lane = *state; + do { + *curOutput++ = (*curInput++) ^ (unsigned char)lane; + lane >>= 8; + } + while ( --sizeLeft != 0 ); + } + } +#else + #error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +#define DeclareVars uint32_t a00, a01, a02, a03; \ + uint32_t a10, a11, a12, a13; \ + uint32_t a20, a21, a22, a23; \ + uint32_t v1, v2 + +#define State2Vars a00 = state[0+0], a01 = state[0+1], a02 = state[0+2], a03 = state[0+3]; \ + a10 = state[4+0], a11 = state[4+1], a12 = state[4+2], a13 = state[4+3]; \ + a20 = state[8+0], a21 = state[8+1], a22 = state[8+2], a23 = state[8+3] + +#define Vars2State state[0+0] = a00, state[0+1] = a01, state[0+2] = a02, state[0+3] = a03; \ + state[4+0] = a10, state[4+1] = a11, state[4+2] = a12, state[4+3] = a13; \ + state[8+0] = a20, state[8+1] = a21, state[8+2] = a22, state[8+3] = a23 + +/* +** Theta: Column Parity Mixer +*/ +#define Theta() \ + v1 = a03 ^ a13 ^ a23; \ + v2 = a00 ^ a10 ^ a20; \ + v1 = ROTL32(v1, 5) ^ ROTL32(v1, 14); \ + a00 ^= v1; \ + a10 ^= v1; \ + a20 ^= v1; \ + v1 = a01 ^ a11 ^ a21; \ + v2 = ROTL32(v2, 5) ^ ROTL32(v2, 14); \ + a01 ^= v2; \ + a11 ^= v2; \ + a21 ^= v2; \ + v2 = a02 ^ a12 ^ a22; \ + v1 = ROTL32(v1, 5) ^ ROTL32(v1, 14); \ + a02 ^= v1; \ + a12 ^= v1; \ + a22 ^= v1; \ + v2 = ROTL32(v2, 5) ^ ROTL32(v2, 14); \ + a03 ^= v2; \ + a13 ^= v2; \ + a23 ^= v2 + +/* +** Rho-west: Plane shift +*/ +#define Rho_west() \ + a20 = ROTL32(a20, 11); \ + a21 = ROTL32(a21, 11); \ + a22 = ROTL32(a22, 11); \ + a23 = ROTL32(a23, 11); \ + v1 = a13; \ + a13 = a12; \ + a12 = a11; \ + a11 = a10; \ + a10 = v1 + +/* +** Iota: Round constants +*/ +#define Iota(__rc) a00 ^= __rc + +/* +** Chi: Non linear step, on colums +*/ +#define Chi() \ + a00 ^= ~a10 & a20; \ + a10 ^= ~a20 & a00; \ + a20 ^= ~a00 & a10; \ + \ + a01 ^= ~a11 & a21; \ + a11 ^= ~a21 & a01; \ + a21 ^= ~a01 & a11; \ + \ + a02 ^= ~a12 & a22; \ + a12 ^= ~a22 & a02; \ + a22 ^= ~a02 & a12; \ + \ + a03 ^= ~a13 & a23; \ + a13 ^= ~a23 & a03; \ + a23 ^= ~a03 & a13 + +/* +** Rho-east: Plane shift +*/ +#define Rho_east() \ + a10 = ROTL32(a10, 1); \ + a11 = ROTL32(a11, 1); \ + a12 = ROTL32(a12, 1); \ + a13 = ROTL32(a13, 1); \ + v1 = ROTL32(a23, 8); \ + a23 = ROTL32(a21, 8); \ + a21 = v1; \ + v1 = ROTL32(a22, 8); \ + a22 = ROTL32(a20, 8); \ + a20 = v1 + +#define Round(__rc) \ + Theta(); \ + Dump3("Theta"); \ + Rho_west(); \ + Dump3("Rho-west"); \ + Iota(__rc); \ + Dump3("Iota"); \ + Chi(); \ + Dump3("Chi"); \ + Rho_east(); \ + Dump3("Rho-east") + +static const uint32_t RC[MAXROUNDS] = { + _rc12, + _rc11, + _rc10, + _rc9, + _rc8, + _rc7, + _rc6, + _rc5, + _rc4, + _rc3, + _rc2, + _rc1 +}; + +void Xoodoo_Permute_Nrounds( uint32_t * state, uint32_t nr ) +{ + DeclareVars; + uint32_t i; + + State2Vars; + for (i = MAXROUNDS - nr; i < MAXROUNDS; ++i ) { + Round(RC[i]); + Dump2("Round"); + } + Dump1("Permutation"); + Vars2State; +} + +void Xoodoo_Permute_6rounds( uint32_t * state) +{ + DeclareVars; + + State2Vars; + Round(_rc6); + Round(_rc5); + Round(_rc4); + Round(_rc3); + Round(_rc2); + Round(_rc1); + Dump1("Permutation"); + Vars2State; +} + +void Xoodoo_Permute_12rounds( uint32_t * state) +{ + DeclareVars; + + State2Vars; + Round(_rc12); + Round(_rc11); + Round(_rc10); + Round(_rc9); + Round(_rc8); + Round(_rc7); + Round(_rc6); + Round(_rc5); + Round(_rc4); + Round(_rc3); + Round(_rc2); + Round(_rc1); + Dump1("Permutation"); + Vars2State; +} diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodoo.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodoo.h new file mode 100644 index 0000000..1b6f1a9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodoo.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodoo_h_ +#define _Xoodoo_h_ + +#include +#include + +#define MAXROUNDS 12 +#define NROWS 3 +#define NCOLUMS 4 +#define NLANES (NCOLUMS*NROWS) + +/* Round constants */ +#define _rc12 0x00000058 +#define _rc11 0x00000038 +#define _rc10 0x000003C0 +#define _rc9 0x000000D0 +#define _rc8 0x00000120 +#define _rc7 0x00000014 +#define _rc6 0x00000060 +#define _rc5 0x0000002C +#define _rc4 0x00000380 +#define _rc3 0x000000F0 +#define _rc2 0x000001A0 +#define _rc1 0x00000012 + + +#if !defined(ROTL32) + #if defined (__arm__) && !defined(__GNUC__) + #define ROTL32(a, offset) __ror(a, (32-(offset))%32) + #elif defined(_MSC_VER) + #define ROTL32(a, offset) _rotl(a, (offset)%32) + #else + #define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32))) + #endif +#endif + +#if !defined(READ32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress))) + #elif defined(_MSC_VER) + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #else + #define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress))) + #endif +#endif + +#if !defined(WRITE32_UNALIGNED) + #if defined (__arm__) && !defined(__GNUC__) + #define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData)) + #elif defined(_MSC_VER) + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #else + #define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData)) + #endif +#endif + +#if !defined(index) + #define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS)) +#endif + +typedef uint32_t tXoodooLane; + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodyak-full-blocks.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodyak-full-blocks.c new file mode 100644 index 0000000..99a62ea --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodyak-full-blocks.c @@ -0,0 +1,127 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include +#include +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +#ifdef OUTPUT +#include +#endif + +#define SnP_AddByte Xoodoo_AddByte +#define SnP_AddBytes Xoodoo_AddBytes +#define SnP_ExtractBytes Xoodoo_ExtractBytes +#define SnP_ExtractAndAddBytes Xoodoo_ExtractAndAddBytes +#define SnP_Permute Xoodoo_Permute_12rounds +#define SnP_OverwriteBytes Xoodoo_OverwriteBytes + +size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen) +{ + size_t initialLength = XLen; + + do { + SnP_Permute(state); /* Xoodyak_Up(instance, NULL, 0, 0); */ + SnP_AddBytes(state, X, 0, Xoodyak_Rkin); /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0); */ + SnP_AddByte(state, 0x01, Xoodyak_Rkin); + X += Xoodyak_Rkin; + XLen -= Xoodyak_Rkin; + } while (XLen >= Xoodyak_Rkin); + + return initialLength - XLen; +} + +size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen) +{ + size_t initialLength = XLen; + + do { + SnP_Permute(state); /* Xoodyak_Up(instance, NULL, 0, 0); */ + SnP_AddBytes(state, X, 0, Xoodyak_Rhash); /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0); */ + SnP_AddByte(state, 0x01, Xoodyak_Rhash); + X += Xoodyak_Rhash; + XLen -= Xoodyak_Rhash; + } while (XLen >= Xoodyak_Rhash); + + return initialLength - XLen; +} + + +size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen) +{ + size_t initialLength = YLen; + + do { + SnP_AddByte(state, 0x01, 0); /* Xoodyak_Down(instance, NULL, 0, 0); */ + SnP_Permute(state); /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0); */ + SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout); + Y += Xoodyak_Rkout; + YLen -= Xoodyak_Rkout; + } while (YLen >= Xoodyak_Rkout); + + return initialLength - YLen; +} + +size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen) +{ + size_t initialLength = YLen; + + do { + SnP_AddByte(state, 0x01, 0); /* Xoodyak_Down(instance, NULL, 0, 0); */ + SnP_Permute(state); /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0); */ + SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash); + Y += Xoodyak_Rhash; + YLen -= Xoodyak_Rhash; + } while (YLen >= Xoodyak_Rhash); + + return initialLength - YLen; +} + +size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +{ + size_t initialLength = IOLen; + + do { + SnP_Permute(state); + SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout); + SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout); + SnP_AddByte(state, 0x01, Xoodyak_Rkout); + I += Xoodyak_Rkout; + O += Xoodyak_Rkout; + IOLen -= Xoodyak_Rkout; + } while (IOLen >= Xoodyak_Rkout); + + return initialLength - IOLen; +} + +size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen) +{ + size_t initialLength = IOLen; + + do { + SnP_Permute(state); + SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout); + SnP_AddBytes(state, O, 0, Xoodyak_Rkout); + SnP_AddByte(state, 0x01, Xoodyak_Rkout); + I += Xoodyak_Rkout; + O += Xoodyak_Rkout; + IOLen -= Xoodyak_Rkout; + } while (IOLen >= Xoodyak_Rkout); + + return initialLength - IOLen; +} diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodyak-parameters.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodyak-parameters.h new file mode 100644 index 0000000..a8c34d8 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodyak-parameters.h @@ -0,0 +1,26 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _Xoodyak_parameters_h_ +#define _Xoodyak_parameters_h_ + +#define Xoodyak_f_bPrime 48 +#define Xoodyak_Rhash 16 +#define Xoodyak_Rkin 44 +#define Xoodyak_Rkout 24 +#define Xoodyak_lRatchet 16 + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodyak.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodyak.c new file mode 100644 index 0000000..e0b67b5 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/Xoodyak.c @@ -0,0 +1,53 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef XoodooReference + #include "displayIntermediateValues.h" +#endif + +#include +#include +#include "Xoodyak.h" + +#ifdef OUTPUT +#include +#include + +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length); +static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length) +{ + unsigned int i; + + fprintf(f, "%s:", synopsis); + for(i=0; i +#include "Cyclist.h" +#include "Xoodoo-SnP.h" +#include "Xoodyak-parameters.h" + +KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment) +KCP_DeclareCyclistFunctions(Xoodyak) + +#else +#error This requires an implementation of Xoodoo +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/align.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/align.h new file mode 100644 index 0000000..82ad2f9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/api.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/api.h new file mode 100644 index 0000000..8060d2b --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/brg_endian.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/config.h b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/config.h new file mode 100644 index 0000000..7dfc043 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/config.h @@ -0,0 +1,4 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Xoodyak +#define XKCP_has_Xoodoo diff --git a/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/hash.c b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/hash.c new file mode 100644 index 0000000..5955de5 --- /dev/null +++ b/xoodyak/Implementations/crypto_hash/xoodyakv1/XKCP-plain-ua/hash.c @@ -0,0 +1,43 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "crypto_hash.h" + +#ifndef crypto_hash_BYTES + #define crypto_hash_BYTES 32 +#endif + +#include "Xoodyak.h" + +int crypto_hash(unsigned char *out, const unsigned char *in, unsigned long long inlen) +{ + Xoodyak_Instance instance; + + Xoodyak_Initialize(&instance, NULL, 0, NULL, 0, NULL, 0); + Xoodyak_Absorb(&instance, in, (size_t)inlen); + Xoodyak_Squeeze(&instance, out, crypto_hash_BYTES); + #if 0 + { + unsigned int i; + for (i = 0; i < crypto_hash_BYTES; ++i ) + { + printf("\\x%02x", out[i] ); + } + printf("\n"); + } + #endif + return 0; +}