Commit b3fdce9a by Enrico Pozzobon

Merge branch 'email-submissions'

parents 3acb0fce 9fb00266
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Cyclist_h_
#define _Cyclist_h_
#include <stdint.h>
#include "align.h"
#define Cyclist_ModeHash 1
#define Cyclist_ModeKeyed 2
#define Cyclist_PhaseDown 1
#define Cyclist_PhaseUp 2
#ifdef OUTPUT
#include <stdio.h>
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
uint8_t stateShadow[size]; \
FILE *file; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistFunctions(prefix) \
void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \
void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \
void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \
void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \
void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \
void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \
void prefix##_Ratchet(prefix##_Instance *instance);
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
#define SnP_Initialize JOIN(SnP, _Initialize)
#define SnP_AddBytes JOIN(SnP, _AddBytes)
#define SnP_AddByte JOIN(SnP, _AddByte)
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
#define Cyclist_Instance JOIN(prefix, _Instance)
#define Cyclist_Initialize JOIN(prefix, _Initialize)
#define Cyclist_Absorb JOIN(prefix, _Absorb)
#define Cyclist_Encrypt JOIN(prefix, _Encrypt)
#define Cyclist_Decrypt JOIN(prefix, _Decrypt)
#define Cyclist_Squeeze JOIN(prefix, _Squeeze)
#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey)
#define Cyclist_Ratchet JOIN(prefix, _Ratchet)
#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny)
#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey)
#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny)
#define Cyclist_Down JOIN(prefix, _Down)
#define Cyclist_Up JOIN(prefix, _Up)
#define Cyclist_Crypt JOIN(prefix, _Crypt)
#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime)
#define Cyclist_Rhash JOIN(prefix, _Rhash)
#define Cyclist_Rkin JOIN(prefix, _Rkin)
#define Cyclist_Rkout JOIN(prefix, _Rkout)
#define Cyclist_lRatchet JOIN(prefix, _lRatchet)
#if defined(CyclistFullBlocks_supported)
#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks)
#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks)
#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks)
#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks)
#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks)
#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks)
/* ------- Cyclist internal interfaces ------- */
static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd)
SnP_AddBytes(instance->state, Xi, 0, XiLen);
SnP_AddByte(instance->state, 0x01, XiLen);
SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1);
instance->phase = Cyclist_PhaseDown;
static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu)
#if defined(OUTPUT)
uint8_t s[Cyclist_f_bPrime];
if (instance->mode != Cyclist_ModeHash) {
SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1);
#if defined(OUTPUT)
if (instance->file != NULL) {
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime );
SnP_Permute( instance->state );
#if defined(OUTPUT)
if (instance->file != NULL) {
memcpy( instance->stateShadow, instance->state, sizeof(instance->state) );
fprintf( instance->file, "Data XORed" );
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
fprintf( instance->file, "After f() ");
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
instance->phase = Cyclist_PhaseUp;
SnP_ExtractBytes( instance->state, Yi, 0, YiLen );
static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd)
unsigned int splitLen;
do {
if (instance->phase != Cyclist_PhaseUp) {
Cyclist_Up(instance, NULL, 0, 0);
splitLen = MyMin(XLen, r);
Cyclist_Down(instance, X, splitLen, Cd);
Cd = 0;
X += splitLen;
XLen -= splitLen;
#if defined(CyclistFullBlocks_supported)
if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
} while ( XLen != 0 );
static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
uint8_t KID[Cyclist_Rkin];
assert(instance->mode == Cyclist_ModeHash);
assert((KLen + IDLen) <= (Cyclist_Rkin - 1));
instance->mode = Cyclist_ModeKeyed;
instance->Rabsorb = Cyclist_Rkin;
instance->Rsqueeze = Cyclist_Rkout;
if (KLen != 0) {
memcpy(KID, K, KLen);
memcpy(KID + KLen, ID, IDLen);
KID[KLen + IDLen] = (uint8_t)IDLen;
Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02);
if (counterLen != 0) {
Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00);
static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu)
unsigned int len;
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, Cu);
Y += len;
YLen -= len;
while (YLen != 0) {
#if defined(CyclistFullBlocks_supported)
if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
Cyclist_Down(instance, NULL, 0, 0);
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, 0);
Y += len;
YLen -= len;
static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt)
unsigned int splitLen;
uint8_t P[Cyclist_Rkout];
uint8_t Cu = 0x80;
do {
if (decrypt != 0) {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, O, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
else {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
memcpy(P, I, splitLen);
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, P, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
Cu = 0x00;
} while ( IOLen != 0 );
/* ------- Cyclist interfaces ------- */
void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
instance->phase = Cyclist_PhaseUp;
instance->mode = Cyclist_ModeHash;
instance->Rabsorb = Cyclist_Rhash;
instance->Rsqueeze = Cyclist_Rhash;
#ifdef OUTPUT
instance->file = 0;
SnP_Initialize( instance->stateShadow );
if (KLen != 0) {
Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen);
void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen)
Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03);
void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, P, C, PLen, 0);
void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, C, P, CLen, 1);
void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen)
Cyclist_SqueezeAny(instance, Y, YLen, 0x40);
void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_SqueezeAny(instance, K, KLen, 0x20);
void Cyclist_Ratchet(Cyclist_Instance *instance)
uint8_t buffer[Cyclist_lRatchet];
assert(instance->mode == Cyclist_ModeKeyed);
/* Squeeze then absorb is the same as overwriting with zeros */
Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10);
Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00);
#undef SnP_StaticInitialize
#undef SnP_Initialize
#undef SnP_AddBytes
#undef SnP_AddByte
#undef SnP_OverwriteBytes
#undef SnP_ExtractBytes
#undef SnP_ExtractAndAddBytes
#undef Cyclist_Instance
#undef Cyclist_Initialize
#undef Cyclist_Absorb
#undef Cyclist_Encrypt
#undef Cyclist_Decrypt
#undef Cyclist_Squeeze
#undef Cyclist_SqueezeKey
#undef Cyclist_Ratchet
#undef Cyclist_AbsorbAny
#undef Cyclist_AbsorbKey
#undef Cyclist_SqueezeAny
#undef Cyclist_Down
#undef Cyclist_Up
#undef Cyclist_Crypt
#undef Cyclist_f_bPrime
#undef Cyclist_Rhash
#undef Cyclist_Rkin
#undef Cyclist_Rkout
#undef Cyclist_lRatchet
#if defined(CyclistFullBlocks_supported)
#undef Cyclist_AbsorbKeyedFullBlocks
#undef Cyclist_AbsorbHashFullBlocks
#undef Cyclist_SqueezeKeyedFullBlocks
#undef Cyclist_SqueezeHashFullBlocks
#undef Cyclist_EncryptFullBlocks
#undef Cyclist_DecryptFullBlocks
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_SnP_h_
#define _Xoodoo_SnP_h_
#include <stddef.h>
#include <stdint.h>
/** For the documentation, see SnP-documentation.h.
#define Xoodoo_implementation "32-bit optimized ARM assembler implementation"
#define Xoodoo_stateSizeInBytes (3*4*4)
#define Xoodoo_stateAlignment 4
#define Xoodoo_StaticInitialize()
void Xoodoo_Initialize(void *state);
#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData)
void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount);
//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds);
void Xoodoo_Permute_6rounds(void *state);
void Xoodoo_Permute_12rounds(void *state);
void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length);
#define Xoodoo_FastXoofff_supported
void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen);
size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length);
size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length);
#define CyclistFullBlocks_supported
size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv6 architecture (e.g.,@ ARM11).
@ ----------------------------------------------------------------------------
@ void Xoodoo_Initialize(void *state)
.align 4
.global Xoodoo_Initialize
.type Xoodoo_Initialize, %function;
movs r1, #0
movs r2, #0
movs r3, #0
movs r12, #0
stmia r0!, { r1 - r3, r12 }
stmia r0!, { r1 - r3, r12 }
stmia r0!, { r1 - r3, r12 }
bx lr
@ ----------------------------------------------------------------------------
@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.align 4
.global Xoodoo_AddBytes
.type Xoodoo_AddBytes, %function;
push {r4,lr}
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_AddBytes_Bytes
Xoodoo_AddBytes_LanesLoop: @ then, perform on lanes
ldr r2, [r0]
ldr r4, [r1], #4
eors r2, r2, r4
str r2, [r0], #4
subs r3, r3, #4
bcs Xoodoo_AddBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_AddBytes_Exit
ldrb r2, [r0]
ldrb r4, [r1], #1
eors r2, r2, r4
strb r2, [r0], #1
subs r3, r3, #1
bcs Xoodoo_AddBytes_BytesLoop
pop {r4,pc}
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.align 4
.global Xoodoo_OverwriteBytes
.type Xoodoo_OverwriteBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_OverwriteBytes_Bytes
Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words
ldr r2, [r1], #4
str r2, [r0], #4
subs r3, r3, #4
bcs Xoodoo_OverwriteBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_OverwriteBytes_Exit
ldrb r2, [r1], #1
strb r2, [r0], #1
subs r3, r3, #1
bcs Xoodoo_OverwriteBytes_BytesLoop
bx lr
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
.align 4
.global Xoodoo_OverwriteWithZeroes
.type Xoodoo_OverwriteWithZeroes, %function;
movs r3, #0
lsrs r2, r1, #2
beq Xoodoo_OverwriteWithZeroes_Bytes
str r3, [r0], #4
subs r2, r2, #1
bne Xoodoo_OverwriteWithZeroes_LoopLanes
ands r1, #3
beq Xoodoo_OverwriteWithZeroes_Exit
strb r3, [r0], #1
subs r1, r1, #1
bne Xoodoo_OverwriteWithZeroes_LoopBytes
bx lr
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.align 4
.global Xoodoo_ExtractBytes
.type Xoodoo_ExtractBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractBytes_Bytes
Xoodoo_ExtractBytes_LanesLoop: @ then, handle words
ldr r2, [r0], #4
str r2, [r1], #4
subs r3, r3, #4
bcs Xoodoo_ExtractBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_ExtractBytes_Exit
ldrb r2, [r0], #1
strb r2, [r1], #1
subs r3, r3, #1
bcs Xoodoo_ExtractBytes_BytesLoop
bx lr
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
.align 4
.global Xoodoo_ExtractAndAddBytes
.type Xoodoo_ExtractAndAddBytes, %function;
push {r4,r5}
adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length)
ldr r3, [sp, #8] @ get length argument from stack
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractAndAddBytes_Bytes
Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, handle words
ldr r5, [r0], #4
ldr r4, [r1], #4
eors r5, r5, r4
str r5, [r2], #4
subs r3, r3, #4
bcs Xoodoo_ExtractAndAddBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_ExtractAndAddBytes_Exit
ldrb r5, [r0], #1
ldrb r4, [r1], #1
eors r5, r5, r4
strb r5, [r2], #1
subs r3, r3, #1
bcs Xoodoo_ExtractAndAddBytes_BytesLoop
pop {r4,r5}
bx lr
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _t3 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
.equ _rc12 , 0x00000058
.equ _rc11 , 0x00000038
.equ _rc10 , 0x000003C0
.equ _rc9 , 0x000000D0
.equ _rc8 , 0x00000120
.equ _rc7 , 0x00000014
.equ _rc6 , 0x00000060
.equ _rc5 , 0x0000002C
.equ _rc4 , 0x00000380
.equ _rc3 , 0x000000F0
.equ _rc2 , 0x000001A0
.equ _rc1 , 0x00000012
.equ _rc6x1, 0x00000003
.equ _rc5x2, 0x0b000000
.equ _rc4x3, 0x07000000
.equ _rc3x4, 0x000f0000
.equ _rc2x5, 0x0000d000
.equ _rc1x6, 0x00000048
.equ _rc12x1, 0xc0000002
.equ _rc11x2, 0x0e000000
.equ _rc10x3, 0x07800000
.equ _rc9x4 , 0x000d0000
.equ _rc8x5 , 0x00009000
.equ _rc7x6 , 0x00000050
.equ _rc6x7 , 0x0000000c
.equ _rc5x8 , 0x2c000000
.equ _rc4x9 , 0x1c000000
.equ _rc3x10, 0x003c0000
.equ _rc2x11, 0x00034000
.equ _rc1x12, 0x00000120
@ ----------------------------------------------------------------------------
.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2
.if ((\rho_e1)%32) == 0
eors \ro, \a0, \a1
eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32
.if ((\rho_e2)%32) == 0
eors \ro, \ro, \a2
eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32
.macro mRliXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ro, \ri, ROR #(32-(\rot))%32
.macro mRloXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ri, \ro, ROR #(32-(\rot))%32
.macro mChi3 a0,a1,a2,r0,r1
bic \r0, \a2, \a1, ROR #_w1
eors \a0, \a0, \r0, ROR #32-_w1
bic \r1, \a0, \a2, ROR #32-_w1
eors \a1, \a1, \r1
bic \r1, \a1, \a0
eors \a2, \a2, \r1, ROR #_w1
.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc
@ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations)
mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2
mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r2, r0, 32-_r0
mRloXor \r6i, r0, \rho_e1-_r0
mRloXor \r10i, r0, \rho_we2-_r0
mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2
mRliXor r1, r1, _r1-_r0
mRloXor r3, r1, 32-_r0
mRloXor \r7i, r1, \rho_e1-_r0
mRloXor \r11i, r1, \rho_we2-_r0
mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r4, r0, 32-_r0
mRloXor \r8i, r0, \rho_e1-_r0
mRloXor \r12i, r0, \rho_we2-_r0
mRliXor r1, r1, _r1-_r0
mRloXor r5, r1, 32-_r0
mRloXor \r9i, r1, \rho_e1-_r0
mRloXor \lri, r1, \rho_we2-_r0
@ After Theta the whole state is rotated -r0
@ from here we must use a1.w instead of a1.i
@ Iota: round constant
.if \rc == 0xc0000002
eor r2, r2, #0x00000002
eor r2, r2, #0xc0000000
eor r2, r2, #\rc
@ Chi: non linear step, on colums
mChi3 r2, \r6w, \r10i, r0, r1
mChi3 r3, \r7w, \r11i, r0, r1
mChi3 r4, \r8w, \r12i, r0, r1
mChi3 r5, \r9w, \lri, r0, r1
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_6rounds( void *state )
.align 4
.global Xoodoo_Permute_6rounds
.type Xoodoo_Permute_6rounds, %function;
push {r0,r4-r11,lr}
ldmia r0!, {r2-r5}
ldmia r0!, {r8-r9}
ldmia r0!, {r6-r7}
ldmia r0, {r10-r12,lr}
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
pop {r0,r1}
ror r2, r2, #32-(6*_r0)%32
ror r3, r3, #32-(6*_r0)%32
ror r4, r4, #32-(6*_r0)%32
ror r5, r5, #32-(6*_r0)%32
ror r6, r6, #32-(6*_r0+1)%32
ror r7, r7, #32-(6*_r0+1)%32
ror r8, r8, #32-(6*_r0+1)%32
ror r9, r9, #32-(6*_r0+1)%32
ror r10, r10, #32-(6*_r0+_e1+_w1)%32
ror r11, r11, #32-(6*_r0+_e1+_w1)%32
ror r12, r12, #32-(6*_r0+_e1+_w1)%32
ror lr, lr, #32-(6*_r0+_e1+_w1)%32
stmia r0, {r2-r12,lr}
mov r4, r1
pop {r5-r11,pc}
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_12rounds( void *state )
.align 4
.global Xoodoo_Permute_12rounds
.type Xoodoo_Permute_12rounds, %function;
push {r0,r4-r11,lr}
ldmia r0, {r2-r12,lr}
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12
ror r2, r2, #32-(12*_r0)%32
ror r3, r3, #32-(12*_r0)%32
ror r4, r4, #32-(12*_r0)%32
ror r5, r5, #32-(12*_r0)%32
ror r6, r6, #32-(12*_r0+1)%32
ror r7, r7, #32-(12*_r0+1)%32
ror r8, r8, #32-(12*_r0+1)%32
ror r9, r9, #32-(12*_r0+1)%32
ror r10, r10, #32-(12*_r0+_e1+_w1)%32
ror r11, r11, #32-(12*_r0+_e1+_w1)%32
ror r12, r12, #32-(12*_r0+_e1+_w1)%32
ror lr, lr, #32-(12*_r0+_e1+_w1)%32
pop {r0,r1}
stmia r0, {r2-r12,lr}
mov r4, r1
pop {r5-r11,pc}
.equ Xoofff_BlockSize , 3*4*4
@ ----------------------------------------------------------------------------
@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen)
.align 4
.global Xoofff_AddIs
.type Xoofff_AddIs, %function;
push {r4-r10,lr}
subs r2, r2, #Xoofff_BlockSize*8
bcc Xoofff_AddIs_LessThanBlock
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
subs r2, r2, #Xoofff_BlockSize*8
bcs Xoofff_AddIs_BlockLoop
adds r2, r2, #Xoofff_BlockSize*8
beq Xoofff_AddIs_Return
subs r2, r2, #16*8
bcc Xoofff_AddIs_LessThan16
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
subs r2, r2, #16*8
bcs Xoofff_AddIs_16Loop
adds r2, r2, #16*8
beq Xoofff_AddIs_Return
subs r2, r2, #4*8
bcc Xoofff_AddIs_LessThan4
ldr r3, [r0]
ldr r7, [r1], #4
eors r3, r3, r7
str r3, [r0], #4
subs r2, r2, #4*8
bcs Xoofff_AddIs_4Loop
adds r2, r2, #4*8
beq Xoofff_AddIs_Return
subs r2, r2, #8
bcc Xoofff_AddIs_LessThan1
ldrb r3, [r0]
ldrb r7, [r1], #1
eors r3, r3, r7
strb r3, [r0], #1
subs r2, r2, #8
bcs Xoofff_AddIs_1Loop
adds r2, r2, #8
beq Xoofff_AddIs_Return
ldrb r3, [r0]
ldrb r7, [r1]
movs r1, #1
eors r3, r3, r7
lsls r1, r1, r2
subs r1, r1, #1
ands r3, r3, r1
strb r3, [r0]
pop {r4-r10,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length)
.equ Xoofff_Compress_kRoll , 0
.equ Xoofff_Compress_input , 4
.equ Xoofff_Compress_xAccu , 8
.equ Xoofff_Compress_iInput , 12
.equ Xoofff_Compress_length , 16
.align 4
.global Xoofff_CompressFastLoop
.type Xoofff_CompressFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r12,lr}
push {r0,r2}
ldmia r0, {r2-r12,lr} @ get initial kRoll
ldr r0, [sp, #Xoofff_Compress_input] @ add input
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r5, r5, r1
ldr r1, [r0], #4
eors r6, r6, r1
ldr r1, [r0], #4
eors r7, r7, r1
ldr r1, [r0], #4
eors r8, r8, r1
ldr r1, [r0], #4
eors r9, r9, r1
ldr r1, [r0], #4
eors r10, r10, r1
ldr r1, [r0], #4
eors r11, r11, r1
ldr r1, [r0], #4
eors r12, r12, r1
ldr r1, [r0], #4
eors lr, lr, r1
str r0, [sp, #Xoofff_Compress_input]
@ permutation
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
@ Extract and add into xAccu
ldr r0, [sp, #Xoofff_Compress_xAccu]
ldr r1, [r0]
mRloXor r2, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r2, [r0], #4
mRloXor r3, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r3, [r0], #4
mRloXor r4, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r4, [r0], #4
mRloXor r5, r1, (6*_r0)%32
str r5, [r0], #4
ldm r0, {r2-r5} @ note that r6-r8 and r7-r9 are swapped
mRliXor r2, r8, (6*_r0+1)%32
mRliXor r3, r9, (6*_r0+1)%32
mRliXor r4, r6, (6*_r0+1)%32
mRliXor r5, r7, (6*_r0+1)%32
stm r0!, {r2-r5}
ldm r0, {r2-r5}
mRliXor r2, r10, (6*_r0+_e1+_w1)%32
mRliXor r3, r11, (6*_r0+_e1+_w1)%32
mRliXor r4, r12, (6*_r0+_e1+_w1)%32
mRliXor r5, lr, (6*_r0+_e1+_w1)%32
stm r0!, {r2-r5}
@roll kRoll
ldr r0, [sp, #Xoofff_Compress_kRoll]
ldr lr, [r0], #4
ldmia r0!, {r10-r12}
ldmia r0!, {r2-r9}
eors lr, lr, lr, LSL #13
eors lr, lr, r2, ROR #32-3
sub r0, #Xoofff_BlockSize
stmia r0, {r2-r12,lr}
@ loop management
ldr r0, [sp, #Xoofff_Compress_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_Compress_length]
bcs Xoofff_CompressFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_Compress_input]
ldr r1, [sp, #Xoofff_Compress_iInput]
sub r0, r0, r1
pop {r1,r2}
pop {r1-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length)
.equ Xoofff_Expand_yAccu , 0
.equ Xoofff_Expand_output , 4
.equ Xoofff_Expand_kRoll , 8
.equ Xoofff_Expand_iOutput , 12
.equ Xoofff_Expand_length , 16
.align 4
.global Xoofff_ExpandFastLoop
.type Xoofff_ExpandFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r12,lr}
push {r0,r2}
ldmia r0, {r2-r12,lr} @ get initial yAccu
@ permutation
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
@ Add k and extract
ldr r0, [sp, #Xoofff_Expand_kRoll]
ldr r1, [r0], #4
mRloXor r2, r1, (6*_r0)%32
ldr r1, [sp, #Xoofff_Expand_output]
str r2, [r1], #4
ldr r2, [r0], #4
mRloXor r3, r2, (6*_r0)%32
ldr r2, [r0], #4
str r3, [r1], #4
mRloXor r4, r2, (6*_r0)%32
ldr r2, [r0], #4
str r4, [r1], #4
mRloXor r5, r2, (6*_r0)%32
str r5, [r1], #4
ldm r0!, {r2-r5} @ Note that r6-r8 and r7-r9 are swapped
mRliXor r2, r8, (6*_r0+1)%32
str r2, [r1], #4
mRliXor r3, r9, (6*_r0+1)%32
str r3, [r1], #4
mRliXor r4, r6, (6*_r0+1)%32
str r4, [r1], #4
mRliXor r5, r7, (6*_r0+1)%32
str r5, [r1], #4
ldm r0!, {r2-r5}
mRliXor r2, r10, (6*_r0+_e1+_w1)%32
str r2, [r1], #4
mRliXor r3, r11, (6*_r0+_e1+_w1)%32
str r3, [r1], #4
mRliXor r4, r12, (6*_r0+_e1+_w1)%32
str r4, [r1], #4
mRliXor r5, lr, (6*_r0+_e1+_w1)%32
str r5, [r1], #4
@ roll-e yAccu
ldr r0, [sp, #Xoofff_Expand_yAccu]
str r1, [sp, #Xoofff_Expand_output]
ldr lr, [r0], #4
ldmia r0!, {r10-r12}
ldmia r0!, {r2-r9}
and r1, r6, r2
eor lr, r1, lr, ROR #32-5
eor lr, lr, r2, ROR #32-13
eor lr, lr, #7
sub r0, #Xoofff_BlockSize
stmia r0, {r2-r12,lr}
@ loop management
ldr r0, [sp, #Xoofff_Expand_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_Expand_length]
bcs Xoofff_ExpandFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_Expand_output]
ldr r1, [sp, #Xoofff_Expand_iOutput]
sub r0, r0, r1
pop {r1,r2}
pop {r1-r12,pc}
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_h_
#define _Xoodoo_h_
#include <stdint.h>
#include <stdlib.h>
#define MAXROUNDS 12
#define NROWS 3
#define NCOLUMS 4
/* Round constants */
#define _rc12 0x00000058
#define _rc11 0x00000038
#define _rc10 0x000003C0
#define _rc9 0x000000D0
#define _rc8 0x00000120
#define _rc7 0x00000014
#define _rc6 0x00000060
#define _rc5 0x0000002C
#define _rc4 0x00000380
#define _rc3 0x000000F0
#define _rc2 0x000001A0
#define _rc1 0x00000012
#if !defined(ROTL32)
#if defined (__arm__) && !defined(__GNUC__)
#define ROTL32(a, offset) __ror(a, (32-(offset))%32)
#elif defined(_MSC_VER)
#define ROTL32(a, offset) _rotl(a, (offset)%32)
#define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32)))
#if !defined(READ32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress)))
#elif defined(_MSC_VER)
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#if !defined(WRITE32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData))
#elif defined(_MSC_VER)
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#if !defined(index)
#define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS))
typedef uint32_t tXoodooLane;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_parameters_h_
#define _Xoodyak_parameters_h_
#define Xoodyak_f_bPrime 48
#define Xoodyak_Rhash 16
#define Xoodyak_Rkin 44
#define Xoodyak_Rkout 24
#define Xoodyak_lRatchet 16
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv6 architecture (e.g.,@ ARM11).
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _t3 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
.equ _rc12 , 0x00000058
.equ _rc11 , 0x00000038
.equ _rc10 , 0x000003C0
.equ _rc9 , 0x000000D0
.equ _rc8 , 0x00000120
.equ _rc7 , 0x00000014
.equ _rc6 , 0x00000060
.equ _rc5 , 0x0000002C
.equ _rc4 , 0x00000380
.equ _rc3 , 0x000000F0
.equ _rc2 , 0x000001A0
.equ _rc1 , 0x00000012
.equ _rc6x1 , 0x00000003
.equ _rc5x2 , 0x0b000000
.equ _rc4x3 , 0x07000000
.equ _rc3x4 , 0x000f0000
.equ _rc2x5 , 0x0000d000
.equ _rc1x6 , 0x00000048
.equ _rc12x1, 0xc0000002
.equ _rc11x2, 0x0e000000
.equ _rc10x3, 0x07800000
.equ _rc9x4 , 0x000d0000
.equ _rc8x5 , 0x00009000
.equ _rc7x6 , 0x00000050
.equ _rc6x7 , 0x0000000c
.equ _rc5x8 , 0x2c000000
.equ _rc4x9 , 0x1c000000
.equ _rc3x10, 0x003c0000
.equ _rc2x11, 0x00034000
.equ _rc1x12, 0x00000120
@ ----------------------------------------------------------------------------
.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2
.if ((\rho_e1)%32) == 0
eors \ro, \a0, \a1
eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32
.if ((\rho_e2)%32) == 0
eors \ro, \ro, \a2
eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32
.macro mRliXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ro, \ri, ROR #(32-(\rot))%32
.macro mRloXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ri, \ro, ROR #(32-(\rot))%32
.macro mChi3 a0,a1,a2,r0,r1
bic \r0, \a2, \a1, ROR #_w1
eors \a0, \a0, \r0, ROR #32-_w1
bic \r1, \a0, \a2, ROR #32-_w1
eors \a1, \a1, \r1
bic \r1, \a1, \a0
eors \a2, \a2, \r1, ROR #_w1
.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc
@ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations)
mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2
mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r2, r0, 32-_r0
mRloXor \r6i, r0, \rho_e1-_r0
mRloXor \r10i, r0, \rho_we2-_r0
mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2
mRliXor r1, r1, _r1-_r0
mRloXor r3, r1, 32-_r0
mRloXor \r7i, r1, \rho_e1-_r0
mRloXor \r11i, r1, \rho_we2-_r0
mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r4, r0, 32-_r0
mRloXor \r8i, r0, \rho_e1-_r0
mRloXor \r12i, r0, \rho_we2-_r0
mRliXor r1, r1, _r1-_r0
mRloXor r5, r1, 32-_r0
mRloXor \r9i, r1, \rho_e1-_r0
mRloXor \lri, r1, \rho_we2-_r0
@ After Theta the whole state is rotated -r0
@ from here we must use a1.w instead of a1.i
@ Iota: round constant
.if \rc == 0xc0000002
eor r2, r2, #0x00000002
eor r2, r2, #0xc0000000
eor r2, r2, #\rc
@ Chi: non linear step, on colums
mChi3 r2, \r6w, \r10i, r0, r1
mChi3 r3, \r7w, \r11i, r0, r1
mChi3 r4, \r8w, \r12i, r0, r1
mChi3 r5, \r9w, \lri, r0, r1
.equ offsetInstance , 0
.equ offsetInitialLen , 16
.equ offsetReturn , 20
@ ----------------------------------------------------------------------------
@ Xoodoo_Permute_12roundsAsm: only callable from asm
.align 4
.type Xoodoo_Permute_12roundsAsm, %function;
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12
ror r2, r2, #32-(12*_r0)%32
ror r3, r3, #32-(12*_r0)%32
ror r4, r4, #32-(12*_r0)%32
ror r5, r5, #32-(12*_r0)%32
ror r6, r6, #32-(12*_r0+1)%32
ror r7, r7, #32-(12*_r0+1)%32
ror r8, r8, #32-(12*_r0+1)%32
ror r9, r9, #32-(12*_r0+1)%32
ror r10, r10, #32-(12*_r0+_e1+_w1)%32
ror r11, r11, #32-(12*_r0+_e1+_w1)%32
ror r12, r12, #32-(12*_r0+_e1+_w1)%32
ror lr, lr, #32-(12*_r0+_e1+_w1)%32
ldr pc, [sp, #offsetReturn]
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@
@ X += Xoodyak_Rkin@
@ XLen -= Xoodyak_Rkin@
@ } while (XLen >= Xoodyak_Rkin)@
@ return initialLength - XLen@
@ }
.equ offsetAbsorbX , 4
.equ offsetAbsorbXLen , 8
.align 4
.global Xoodyak_AbsorbKeyedFullBlocks
.type Xoodyak_AbsorbKeyedFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #44
ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
b Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetAbsorbX]
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r5, r5, r1
ldr r1, [r0], #4
eors r6, r6, r1
ldr r1, [r0], #4
eors r7, r7, r1
ldr r1, [r0], #4
eors r8, r8, r1
ldr r1, [r0], #4
eors r9, r9, r1
ldr r1, [r0], #4
eors r10, r10, r1
ldr r1, [r0], #4
eors r11, r11, r1
ldr r1, [r0], #4
eors lr, lr, #1
eors r12, r12, r1
ldr r1, [sp, #offsetAbsorbXLen]
str r0, [sp, #offsetAbsorbX]
subs r1, r1, #44
str r1, [sp, #offsetAbsorbXLen]
bcs Xoodyak_AbsorbKeyedFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #44
sub r0, r4, r2
pop {r4-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@
@ X += Xoodyak_Rhash@
@ XLen -= Xoodyak_Rhash@
@ } while (XLen >= Xoodyak_Rhash)@
@ return initialLength - XLen@
@ }
.align 4
.global Xoodyak_AbsorbHashFullBlocks
.type Xoodyak_AbsorbHashFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #16
ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
b Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetAbsorbX]
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r6, r6, #1
eors r5, r5, r1
ldr r1, [sp, #offsetAbsorbXLen]
str r0, [sp, #offsetAbsorbX]
subs r1, r1, #16
str r1, [sp, #offsetAbsorbXLen]
bcs Xoodyak_AbsorbHashFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #16
sub r0, r4, r2
pop {r4-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@
@ Y += Xoodyak_Rkout@
@ YLen -= Xoodyak_Rkout@
@ } while (YLen >= Xoodyak_Rkout)@
@ return initialLength - YLen@
@ }
.equ offsetSqueezeY , 4
.equ offsetSqueezeYLen , 8
.align 4
.global Xoodyak_SqueezeKeyedFullBlocks
.type Xoodyak_SqueezeKeyedFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #24
ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
eors r2, r2, #1
b Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetSqueezeY]
str r2, [r0], #4
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
str r7, [r0], #4
ldr r1, [sp, #offsetSqueezeYLen]
str r0, [sp, #offsetSqueezeY]
subs r1, r1, #24
str r1, [sp, #offsetSqueezeYLen]
bcs Xoodyak_SqueezeKeyedFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #24
sub r0, r4, r2
pop {r4-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@
@ Y += Xoodyak_Rhash@
@ YLen -= Xoodyak_Rhash@
@ } while (YLen >= Xoodyak_Rhash)@
@ return initialLength - YLen@
@ }
.align 4
.global Xoodyak_SqueezeHashFullBlocks
.type Xoodyak_SqueezeHashFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #16
ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
eors r2, r2, #1
b Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetSqueezeY]
str r2, [r0], #4
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
ldr r1, [sp, #offsetSqueezeYLen]
str r0, [sp, #offsetSqueezeY]
subs r1, r1, #16
str r1, [sp, #offsetSqueezeYLen]
bcs Xoodyak_SqueezeHashFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #16
sub r0, r4, r2
pop {r4-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.equ offsetCryptI , 4+8
.equ offsetCryptO , 8+8
.equ offsetCryptIOLen , 12
.align 4
.global Xoodyak_EncryptFullBlocks
.type Xoodyak_EncryptFullBlocks, %function;
push {r4-r12,lr}
mov r4, r3 @ r4 initialLength
subs r3, r3, #24
ldr r5, =Xoodyak_EncryptFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
b Xoodoo_Permute_12roundsAsm
push {r10, r11}
ldr r11, [sp, #offsetCryptI]
ldr r10, [sp, #offsetCryptO]
ldr r0, [r11], #4
ldr r1, [r11], #4
eors r2, r2, r0
str r2, [r10], #4
eors r3, r3, r1
ldr r0, [r11], #4
str r3, [r10], #4
eors r4, r4, r0
ldr r1, [r11], #4
str r4, [r10], #4
eors r5, r5, r1
ldr r0, [r11], #4
str r5, [r10], #4
eors r6, r6, r0
ldr r1, [r11], #4
str r6, [r10], #4
eors r7, r7, r1
str r7, [r10], #4
str r10, [sp, #offsetCryptO]
str r11, [sp, #offsetCryptI]
pop {r10, r11}
ldr r0, [sp, #offsetCryptIOLen]
eors r8, r8, #1
subs r0, r0, #24
str r0, [sp, #offsetCryptIOLen]
bcs Xoodyak_EncryptFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r3, r3, #24
sub r0, r4, r3
pop {r4-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.align 4
.global Xoodyak_DecryptFullBlocks
.type Xoodyak_DecryptFullBlocks, %function;
push {r4-r12,lr}
mov r4, r3 @ r4 initialLength
subs r3, r3, #24
ldr r5, =Xoodyak_DecryptFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
b Xoodoo_Permute_12roundsAsm
push {r10, r11}
ldr r11, [sp, #offsetCryptI]
ldr r10, [sp, #offsetCryptO]
ldr r0, [r11], #4
ldr r1, [r11], #4
eors r2, r2, r0
str r2, [r10], #4
mov r2, r0
eors r3, r3, r1
ldr r0, [r11], #4
str r3, [r10], #4
mov r3, r1
eors r4, r4, r0
ldr r1, [r11], #4
str r4, [r10], #4
mov r4, r0
eors r5, r5, r1
ldr r0, [r11], #4
str r5, [r10], #4
mov r5, r1
eors r6, r6, r0
ldr r1, [r11], #4
str r6, [r10], #4
mov r6, r0
eors r7, r7, r1
str r7, [r10], #4
mov r7, r1
str r10, [sp, #offsetCryptO]
str r11, [sp, #offsetCryptI]
pop {r10, r11}
ldr r0, [sp, #offsetCryptIOLen]
eors r8, r8, #1
subs r0, r0, #24
str r0, [sp, #offsetCryptIOLen]
bcs Xoodyak_DecryptFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r3, r3, #24
sub r0, r4, r3
pop {r4-r12,pc}
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifdef XoodooReference
#include "displayIntermediateValues.h"
#include <assert.h>
#include <string.h>
#include "Xoodyak.h"
#ifdef OUTPUT
#include <stdlib.h>
#include <string.h>
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length);
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length)
unsigned int i;
fprintf(f, "%s:", synopsis);
for(i=0; i<length; i++)
fprintf(f, " %02x", (unsigned int)data[i]);
fprintf(f, "\n");
#define MyMin(a,b) (((a) < (b)) ? (a) : (b))
#ifdef XKCP_has_Xoodoo
#include "Xoodoo-SnP.h"
#define SnP Xoodoo
#define SnP_Permute Xoodoo_Permute_12rounds
#define prefix Xoodyak
#include ""
#undef prefix
#undef SnP
#undef SnP_Permute
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_h_
#define _Xoodyak_h_
#include "config.h"
#ifdef XKCP_has_Xoodoo
#include <stddef.h>
#include "Cyclist.h"
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment)
#error This requires an implementation of Xoodoo
The eXtended Keccak Code Package (XKCP)
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _align_h_
#define _align_h_
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
#ifdef ALIGN
#undef ALIGN
#if defined(__GNUC__)
#define ALIGN(x) __attribute__ ((aligned(x)))
#elif defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#elif defined(__ARMCC_VERSION)
#define ALIGN(x) __align(x)
#define ALIGN(x)
#define CRYPTO_ABYTES 16
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
#elif defined( LITTLE_ENDIAN )
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
#elif defined( _LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
#elif defined( __LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# endif
#elif defined( __BIG_ENDIAN__ )
#elif defined( __LITTLE_ENDIAN__ )
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \
defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ )
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# else
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
/* File generated by ToTargetConfigFile.xsl */
#define XKCP_has_Xoodyak
#define XKCP_has_Xoodoo
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include "crypto_aead.h"
#include "api.h"
#include "Xoodyak.h"
#include <string.h>
#if !defined(CRYPTO_KEYBYTES)
#if !defined(CRYPTO_NPUBBYTES)
#define TAGLEN 16
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
Xoodyak_Instance instance;
Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES);
Xoodyak_Absorb(&instance, ad, (size_t)adlen);
Xoodyak_Encrypt(&instance, m, c, (size_t)mlen);
Xoodyak_Squeeze(&instance, c + mlen, TAGLEN);
*clen = mlen + TAGLEN;
#if 0
unsigned int i;
for (i = 0; i < *clen; ++i )
printf("\\x%02x", c[i] );
return 0;
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
Xoodyak_Instance instance;
unsigned char tag[TAGLEN];
unsigned long long mlen_;
*mlen = 0;
if (clen < TAGLEN) {
return -1;
mlen_ = clen - TAGLEN;
Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES);
Xoodyak_Absorb(&instance, ad, (size_t)adlen);
Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_);
Xoodyak_Squeeze(&instance, tag, TAGLEN);
if (memcmp(tag, c + mlen_, TAGLEN) != 0) {
memset(m, 0, (size_t)mlen_);
return -1;
*mlen = mlen_;
return 0;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Cyclist_h_
#define _Cyclist_h_
#include <stdint.h>
#include "align.h"
#define Cyclist_ModeHash 1
#define Cyclist_ModeKeyed 2
#define Cyclist_PhaseDown 1
#define Cyclist_PhaseUp 2
#ifdef OUTPUT
#include <stdio.h>
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
uint8_t stateShadow[size]; \
FILE *file; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistFunctions(prefix) \
void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \
void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \
void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \
void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \
void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \
void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \
void prefix##_Ratchet(prefix##_Instance *instance);
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
#define SnP_Initialize JOIN(SnP, _Initialize)
#define SnP_AddBytes JOIN(SnP, _AddBytes)
#define SnP_AddByte JOIN(SnP, _AddByte)
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
#define Cyclist_Instance JOIN(prefix, _Instance)
#define Cyclist_Initialize JOIN(prefix, _Initialize)
#define Cyclist_Absorb JOIN(prefix, _Absorb)
#define Cyclist_Encrypt JOIN(prefix, _Encrypt)
#define Cyclist_Decrypt JOIN(prefix, _Decrypt)
#define Cyclist_Squeeze JOIN(prefix, _Squeeze)
#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey)
#define Cyclist_Ratchet JOIN(prefix, _Ratchet)
#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny)
#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey)
#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny)
#define Cyclist_Down JOIN(prefix, _Down)
#define Cyclist_Up JOIN(prefix, _Up)
#define Cyclist_Crypt JOIN(prefix, _Crypt)
#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime)
#define Cyclist_Rhash JOIN(prefix, _Rhash)
#define Cyclist_Rkin JOIN(prefix, _Rkin)
#define Cyclist_Rkout JOIN(prefix, _Rkout)
#define Cyclist_lRatchet JOIN(prefix, _lRatchet)
#if defined(CyclistFullBlocks_supported)
#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks)
#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks)
#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks)
#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks)
#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks)
#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks)
/* ------- Cyclist internal interfaces ------- */
static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd)
SnP_AddBytes(instance->state, Xi, 0, XiLen);
SnP_AddByte(instance->state, 0x01, XiLen);
SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1);
instance->phase = Cyclist_PhaseDown;
static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu)
#if defined(OUTPUT)
uint8_t s[Cyclist_f_bPrime];
if (instance->mode != Cyclist_ModeHash) {
SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1);
#if defined(OUTPUT)
if (instance->file != NULL) {
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime );
SnP_Permute( instance->state );
#if defined(OUTPUT)
if (instance->file != NULL) {
memcpy( instance->stateShadow, instance->state, sizeof(instance->state) );
fprintf( instance->file, "Data XORed" );
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
fprintf( instance->file, "After f() ");
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
instance->phase = Cyclist_PhaseUp;
SnP_ExtractBytes( instance->state, Yi, 0, YiLen );
static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd)
unsigned int splitLen;
do {
if (instance->phase != Cyclist_PhaseUp) {
Cyclist_Up(instance, NULL, 0, 0);
splitLen = MyMin(XLen, r);
Cyclist_Down(instance, X, splitLen, Cd);
Cd = 0;
X += splitLen;
XLen -= splitLen;
#if defined(CyclistFullBlocks_supported)
if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
} while ( XLen != 0 );
static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
uint8_t KID[Cyclist_Rkin];
assert(instance->mode == Cyclist_ModeHash);
assert((KLen + IDLen) <= (Cyclist_Rkin - 1));
instance->mode = Cyclist_ModeKeyed;
instance->Rabsorb = Cyclist_Rkin;
instance->Rsqueeze = Cyclist_Rkout;
if (KLen != 0) {
memcpy(KID, K, KLen);
memcpy(KID + KLen, ID, IDLen);
KID[KLen + IDLen] = (uint8_t)IDLen;
Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02);
if (counterLen != 0) {
Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00);
static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu)
unsigned int len;
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, Cu);
Y += len;
YLen -= len;
while (YLen != 0) {
#if defined(CyclistFullBlocks_supported)
if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
Cyclist_Down(instance, NULL, 0, 0);
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, 0);
Y += len;
YLen -= len;
static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt)
unsigned int splitLen;
uint8_t P[Cyclist_Rkout];
uint8_t Cu = 0x80;
do {
if (decrypt != 0) {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, O, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
else {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
memcpy(P, I, splitLen);
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, P, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
Cu = 0x00;
} while ( IOLen != 0 );
/* ------- Cyclist interfaces ------- */
void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
instance->phase = Cyclist_PhaseUp;
instance->mode = Cyclist_ModeHash;
instance->Rabsorb = Cyclist_Rhash;
instance->Rsqueeze = Cyclist_Rhash;
#ifdef OUTPUT
instance->file = 0;
SnP_Initialize( instance->stateShadow );
if (KLen != 0) {
Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen);
void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen)
Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03);
void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, P, C, PLen, 0);
void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, C, P, CLen, 1);
void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen)
Cyclist_SqueezeAny(instance, Y, YLen, 0x40);
void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_SqueezeAny(instance, K, KLen, 0x20);
void Cyclist_Ratchet(Cyclist_Instance *instance)
uint8_t buffer[Cyclist_lRatchet];
assert(instance->mode == Cyclist_ModeKeyed);
/* Squeeze then absorb is the same as overwriting with zeros */
Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10);
Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00);
#undef SnP_StaticInitialize
#undef SnP_Initialize
#undef SnP_AddBytes
#undef SnP_AddByte
#undef SnP_OverwriteBytes
#undef SnP_ExtractBytes
#undef SnP_ExtractAndAddBytes
#undef Cyclist_Instance
#undef Cyclist_Initialize
#undef Cyclist_Absorb
#undef Cyclist_Encrypt
#undef Cyclist_Decrypt
#undef Cyclist_Squeeze
#undef Cyclist_SqueezeKey
#undef Cyclist_Ratchet
#undef Cyclist_AbsorbAny
#undef Cyclist_AbsorbKey
#undef Cyclist_SqueezeAny
#undef Cyclist_Down
#undef Cyclist_Up
#undef Cyclist_Crypt
#undef Cyclist_f_bPrime
#undef Cyclist_Rhash
#undef Cyclist_Rkin
#undef Cyclist_Rkout
#undef Cyclist_lRatchet
#if defined(CyclistFullBlocks_supported)
#undef Cyclist_AbsorbKeyedFullBlocks
#undef Cyclist_AbsorbHashFullBlocks
#undef Cyclist_SqueezeKeyedFullBlocks
#undef Cyclist_SqueezeHashFullBlocks
#undef Cyclist_EncryptFullBlocks
#undef Cyclist_DecryptFullBlocks
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_SnP_h_
#define _Xoodoo_SnP_h_
#include <stddef.h>
#include <stdint.h>
/** For the documentation, see SnP-documentation.h.
#define Xoodoo_implementation "32-bit optimized ARM assembler implementation"
#define Xoodoo_stateSizeInBytes (3*4*4)
#define Xoodoo_stateAlignment 4
#define Xoodoo_StaticInitialize()
void Xoodoo_Initialize(void *state);
#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData)
void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount);
//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds);
void Xoodoo_Permute_6rounds(void *state);
void Xoodoo_Permute_12rounds(void *state);
void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length);
#define Xoodoo_FastXoofff_supported
void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen);
size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length);
size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length);
#define CyclistFullBlocks_supported
size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv6m architecture (Cortex-M0, ...).
.syntax unified
@ ----------------------------------------------------------------------------
@ void Xoodoo_Initialize(void *state)
.align 4
.global Xoodoo_Initialize
.type Xoodoo_Initialize, %function;
movs r1, #0
movs r2, #0
movs r3, #0
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_AddBytes
.type Xoodoo_AddBytes, %function;
push {r4,lr}
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_AddBytes_Bytes
movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed
orrs r2, r2, r1
lsls r2, #30
bne Xoodoo_AddBytes_Bytes
Xoodoo_AddBytes_LanesLoop: @ then, perform on words
ldr r2, [r0]
ldmia r1!, {r4}
eors r2, r2, r4
stmia r0!, {r2}
subs r3, r3, #4
bcs Xoodoo_AddBytes_LanesLoop
adds r3, r3, #4
beq Xoodoo_AddBytes_Exit
subs r3, r3, #1
ldrb r2, [r0, r3]
ldrb r4, [r1, r3]
eors r2, r2, r4
strb r2, [r0, r3]
subs r3, r3, #1
bcs Xoodoo_AddBytes_BytesLoop
pop {r4,pc}
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_OverwriteBytes
.type Xoodoo_OverwriteBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_OverwriteBytes_Bytes
movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed
orrs r2, r2, r1
lsls r2, #30
bne Xoodoo_OverwriteBytes_Bytes
Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words
ldmia r1!, {r2}
stmia r0!, {r2}
subs r3, r3, #4
bcs Xoodoo_OverwriteBytes_LanesLoop
adds r3, r3, #4
beq Xoodoo_OverwriteBytes_Exit
subs r3, r3, #1
ldrb r2, [r1, r3]
strb r2, [r0, r3]
subs r3, r3, #1
bcs Xoodoo_OverwriteBytes_BytesLoop
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
.global Xoodoo_OverwriteWithZeroes
.type Xoodoo_OverwriteWithZeroes, %function;
movs r3, #0
lsrs r2, r1, #2
beq Xoodoo_OverwriteWithZeroes_Bytes
stm r0!, { r3 }
subs r2, r2, #1
bne Xoodoo_OverwriteWithZeroes_LoopLanes
lsls r1, r1, #32-2
beq Xoodoo_OverwriteWithZeroes_Exit
lsrs r1, r1, #32-2
subs r1, r1, #1
strb r3, [r0, r1]
bne Xoodoo_OverwriteWithZeroes_LoopBytes
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_ExtractBytes
.type Xoodoo_ExtractBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractBytes_Bytes
movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed
orrs r2, r2, r1
lsls r2, #30
bne Xoodoo_ExtractBytes_Bytes
Xoodoo_ExtractBytes_LanesLoop: @ then, perform on words
ldmia r0!, {r2}
stmia r1!, {r2}
subs r3, r3, #4
bcs Xoodoo_ExtractBytes_LanesLoop
adds r3, r3, #4
beq Xoodoo_ExtractBytes_Exit
subs r3, r3, #1
ldrb r2, [r0, r3]
strb r2, [r1, r3]
subs r3, r3, #1
bcs Xoodoo_ExtractBytes_BytesLoop
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
.global Xoodoo_ExtractAndAddBytes
.type Xoodoo_ExtractAndAddBytes, %function;
push {r4,r5}
adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length)
ldr r3, [sp, #8] @ get length argument from stack
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractAndAddBytes_Bytes
movs r5, r0 @ and input/output/state pointer all 32-bit .align 8ed
orrs r5, r5, r1
orrs r5, r5, r2
lsls r5, #30
bne Xoodoo_ExtractAndAddBytes_Bytes
Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, perform on words
ldmia r0!, {r5}
ldmia r1!, {r4}
eors r5, r5, r4
stmia r2!, {r5}
subs r3, r3, #4
bcs Xoodoo_ExtractAndAddBytes_LanesLoop
adds r3, r3, #4
beq Xoodoo_ExtractAndAddBytes_Exit
subs r3, r3, #1
ldrb r5, [r0, r3]
ldrb r4, [r1, r3]
eors r5, r5, r4
strb r5, [r2, r3]
subs r3, r3, #1
bcs Xoodoo_ExtractAndAddBytes_BytesLoop
pop {r4,r5}
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ offsets in RAM state
.equ _oA00 , 0*4
.equ _oA01 , 1*4
.equ _oA02 , 2*4
.equ _oA03 , 3*4
.equ _oA10 , 4*4
.equ _oA11 , 5*4
.equ _oA12 , 6*4
.equ _oA13 , 7*4
.equ _oA20 , 8*4
.equ _oA21 , 9*4
.equ _oA22 , 10*4
.equ _oA23 , 11*4
@ possible locations of state lanes
.equ locRegL , 1
.equ locRegH , 2
.equ locMem , 3
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _r2 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
@ ----------------------------------------------------------------------------
.macro mXor3 ro, a0, a1, a2, loc, tt
mov \ro, \a1
eors \ro, \ro, \a2
.if \loc == locRegL
eors \ro, \ro, \a0
.if \loc == locRegH
mov \tt, \a0
ldr \tt, [sp, #\a0]
eors \ro, \ro, \tt
.macro mXor ro, ri, tt, loc
.if \loc == locRegL
eors \ro, \ro, \ri
.if \loc == locRegH
mov \tt, \ro
eors \tt, \tt, \ri
mov \ro, \tt
ldr \tt, [sp, #\ro]
eors \tt, \tt, \ri
str \tt, [sp, #\ro]
.macro mChi3 a0,a1,a2,r0,r1,a0s,loc
mov \r1, \a2
mov \r0, \a1
bics \r1, \r1, \r0
eors \a0, \a0, \r1
.if \loc != locRegL
.if \loc == locRegH
mov \a0s, \a0
str \a0, [sp, #\a0s]
mov \r0, \a0
bics \r0, \r0, \a2
mov \r1, \a1
eors \r1, \r1, \r0
mov \a1, \r1
bics \r1, \r1, \a0
eors \a2, \a2, \r1
.macro mRound offsetRC, offsetA03
@ Theta: Column Parity Mixer
mXor3 r0, \offsetA03, lr, r7, locMem, r2
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r3, r10, r4, locRegL, r2
mXor r3, r1, r2, locRegL
mXor r10, r1, r2, locRegH
mXor r4, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r8, r11, r5, locRegH, r2
mXor r8, r1, r2, locRegH
mXor r11, r1, r2, locRegH
mXor r5, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r9, r12, r6, locRegH, r2
mXor r9, r1, r2, locRegH
mXor r12, r1, r2, locRegH
mXor r6, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor \offsetA03, r1, r2, locMem
mXor lr, r1, r2, locRegH
mXor r7, r1, r2, locRegL
@ Rho-west: Plane shift
movs r0, #32-_w1
rors r4, r4, r0
rors r5, r5, r0
rors r6, r6, r0
rors r7, r7, r0
mov r0, lr
mov lr, r12
mov r12, r11
mov r11, r10
mov r10, r0
@ Iota: round constant
ldr r0, [sp, #\offsetRC]
ldmia r0!, {r1}
str r0, [sp, #\offsetRC]
eors r3, r3, r1
@ Chi: non linear step, on colums
mChi3 r3, r10, r4, r0, r1, r3, locRegL
mov r2, r8
mChi3 r2, r11, r5, r0, r1, r8, locRegH
mov r2, r9
mChi3 r2, r12, r6, r0, r1, r9, locRegH
ldr r2, [sp, #\offsetA03]
mChi3 r2, lr, r7, r0, r1, \offsetA03, locMem
@ Rho-east: Plane shift
movs r0, #32-1
mov r1, r10
rors r1, r1, r0
mov r10, r1
mov r1, r11
rors r1, r1, r0
mov r11, r1
mov r1, r12
rors r1, r1, r0
mov r12, r1
mov r1, lr
rors r1, r1, r0
mov lr, r1
movs r0, #32-_e1
rors r4, r4, r0
rors r5, r5, r0
rors r6, r6, r0
rors r7, r7, r0
mov r0, r4
mov r4, r6
mov r6, r0
mov r0, r5
mov r5, r7
mov r7, r0
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds)
@ offsets on stack
.equ Xoodoo_Permute_Nrounds_offsetA03 , 0
.equ Xoodoo_Permute_Nrounds_offsetRC , 4
.equ Xoodoo_Permute_Nrounds_SAS , 8
.equ Xoodoo_Permute_Nrounds_offsetState , Xoodoo_Permute_Nrounds_SAS
.global Xoodoo_Permute_Nrounds
.type Xoodoo_Permute_Nrounds, %function;
push {r4-r6,lr}
mov r2, r8
mov r3, r9
mov r4, r10
mov r5, r11
push {r0,r2-r5,r7}
sub sp, #Xoodoo_Permute_Nrounds_SAS
adr r2, Xoodoo_Permute_RoundConstants12
lsls r1, r1, #2
subs r2, r2, r1
str r2, [sp, #Xoodoo_Permute_Nrounds_offsetRC]
ldm r0!, {r3,r5,r6,r7}
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_Nrounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
mRound Xoodoo_Permute_Nrounds_offsetRC, Xoodoo_Permute_Nrounds_offsetA03
ldr r0, [sp, #Xoodoo_Permute_Nrounds_offsetRC]
ldr r0, [r0]
cmp r0, #0
beq Xoodoo_Permute_Nrouds_Done
b Xoodoo_Permute_Nrouds_Loop
ldr r0, [sp, #Xoodoo_Permute_Nrounds_offsetState]
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_Nrounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
add sp, #Xoodoo_Permute_Nrounds_SAS
pop {r0-r4,r7}
mov r8, r1
mov r9, r2
mov r10, r3
mov r11, r4
pop {r4-r6,pc}
.align 4
.long 0x00000058
.long 0x00000038
.long 0x000003C0
.long 0x000000D0
.long 0x00000120
.long 0x00000014
.long 0x00000060
.long 0x0000002C
.long 0x00000380
.long 0x000000F0
.long 0x000001A0
.long 0x00000012
.long 0
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_6rounds( void *state )
.global Xoodoo_Permute_6rounds
.type Xoodoo_Permute_6rounds, %function;
movs r1, #6
b Xoodoo_Permute_Nrounds
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_12rounds( void *state )
.global Xoodoo_Permute_12rounds
.type Xoodoo_Permute_12rounds, %function;
movs r1, #12
b Xoodoo_Permute_Nrounds
.align 4
.equ Xoofff_BlockSize , 3*4*4
@ ----------------------------------------------------------------------------
@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen)
.global Xoofff_AddIs
.type Xoofff_AddIs, %function;
push {r4-r6,lr}
movs r3, r0 @ check input and output pointer both 32-bit .align 8ed
orrs r3, r3, r1
lsls r3, r3, #30
bne Xoofff_AddIs_Bytes
subs r2, r2, #16*8
bcc Xoofff_AddIs_LessThan16
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldmia r1!, {r5,r6}
eors r3, r3, r5
eors r4, r4, r6
stmia r0!, {r3,r4}
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldmia r1!, {r5,r6}
eors r3, r3, r5
eors r4, r4, r6
stmia r0!, {r3,r4}
subs r2, r2, #16*8
bcs Xoofff_AddIs_16Loop
adds r2, r2, #16*8
beq Xoofff_AddIs_Return
subs r2, r2, #4*8
bcc Xoofff_AddIs_LessThan4
ldr r3, [r0]
ldmia r1!, {r4}
eors r3, r3, r4
stmia r0!, {r3}
subs r2, r2, #4*8
bcs Xoofff_AddIs_4Loop
adds r2, r2, #4*8
beq Xoofff_AddIs_Return
subs r2, r2, #8
bcc Xoofff_AddIs_LessThan1
ldrb r3, [r0]
ldrb r4, [r1]
adds r1, r1, #1
eors r3, r3, r4
strb r3, [r0]
adds r0, r0, #1
subs r2, r2, #8
bcs Xoofff_AddIs_1Loop
adds r2, r2, #8
beq Xoofff_AddIs_Return
ldrb r3, [r0]
ldrb r4, [r1]
movs r1, #1
eors r3, r3, r4
lsls r1, r1, r2
subs r1, r1, #1
ands r3, r3, r1
strb r3, [r0]
pop {r4-r6,pc}
.align 4
.macro mLdu rv, ri, tt
ldrb \rv, [\ri, #3]
lsls \rv, \rv, #8
ldrb \tt, [\ri, #2]
orrs \rv, \rv, \tt
lsls \rv, \rv, #8
ldrb \tt, [\ri, #1]
orrs \rv, \rv, \tt
lsls \rv, \rv, #8
ldrb \tt, [\ri, #0]
orrs \rv, \rv, \tt
adds \ri, \ri, #4
@ ----------------------------------------------------------------------------
@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length)
@ offsets on stack
.equ Xoofff_CompressFastLoop_offsetA03 , 0
.equ Xoofff_CompressFastLoop_offsetRC , 4
.equ Xoofff_CompressFastLoop_SAS , 8
.equ Xoofff_CompressFastLoop_kRoll , Xoofff_CompressFastLoop_SAS+0
.equ Xoofff_CompressFastLoop_input , Xoofff_CompressFastLoop_SAS+4
.equ Xoofff_CompressFastLoop_xAccu , Xoofff_CompressFastLoop_SAS+8+16
.equ Xoofff_CompressFastLoop_iInput , Xoofff_CompressFastLoop_SAS+12+16
.equ Xoofff_CompressFastLoop_length , Xoofff_CompressFastLoop_SAS+16+16
.global Xoofff_CompressFastLoop
.type Xoofff_CompressFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r0,r2,r4-r7}
sub sp, #Xoofff_CompressFastLoop_SAS
ldm r0!, {r3,r5,r6,r7} @ get initial kRoll
mov r8, r5
mov r9, r6
str r7, [sp, #Xoofff_CompressFastLoop_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
adr r1, Xoofff_CompressFastLoop_RoundConstants6
str r1, [sp, #Xoofff_CompressFastLoop_offsetRC]
ldr r0, [sp, #Xoofff_CompressFastLoop_input] @ add input
lsls r1, r0, #30
bne Xoofff_CompressFastLoop_Unaligned
ldmia r0!, {r1}
eors r3, r3, r1
ldmia r0!, {r1}
mov r2, r8
eors r2, r2, r1
mov r8, r2
ldmia r0!, {r1}
mov r2, r9
eors r2, r2, r1
mov r9, r2
ldmia r0!, {r1}
ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoofff_CompressFastLoop_offsetA03]
ldmia r0!, {r1}
mov r2, r10
eors r2, r2, r1
mov r10, r2
ldmia r0!, {r1}
mov r2, r11
eors r2, r2, r1
mov r11, r2
ldmia r0!, {r1}
mov r2, r12
eors r2, r2, r1
mov r12, r2
ldmia r0!, {r1}
mov r2, lr
eors r2, r2, r1
mov lr, r2
ldmia r0!, {r1,r2}
eors r4, r4, r1
eors r5, r5, r2
ldmia r0!, {r1,r2}
eors r6, r6, r1
eors r7, r7, r2
b Xoofff_CompressFastLoop_Permute
.align 4
.long 0x00000060
.long 0x0000002C
.long 0x00000380
.long 0x000000F0
.long 0x000001A0
.long 0x00000012
.long 0
mLdu r1, r0, r2
eors r3, r3, r1
mLdu r1, r0, r2
mov r2, r8
eors r2, r2, r1
mov r8, r2
mLdu r1, r0, r2
mov r2, r9
eors r2, r2, r1
mov r9, r2
mLdu r1, r0, r2
ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoofff_CompressFastLoop_offsetA03]
mLdu r1, r0, r2
mov r2, r10
eors r2, r2, r1
mov r10, r2
mLdu r1, r0, r2
mov r2, r11
eors r2, r2, r1
mov r11, r2
mLdu r1, r0, r2
mov r2, r12
eors r2, r2, r1
mov r12, r2
mLdu r1, r0, r2
mov r2, lr
eors r2, r2, r1
mov lr, r2
mLdu r1, r0, r2
eors r4, r4, r1
mLdu r1, r0, r2
eors r5, r5, r1
mLdu r1, r0, r2
eors r6, r6, r1
mLdu r1, r0, r2
eors r7, r7, r1
str r0, [sp, #Xoofff_CompressFastLoop_input]
mRound Xoofff_CompressFastLoop_offsetRC, Xoofff_CompressFastLoop_offsetA03
ldr r0, [sp, #Xoofff_CompressFastLoop_offsetRC]
ldr r0, [r0]
cmp r0, #0
beq Xoofff_CompressFastLoop_PermuteDone
b Xoofff_CompressFastLoop_PermuteLoop
@ Extract and add into xAccu
ldr r0, [sp, #Xoofff_CompressFastLoop_xAccu]
ldr r1, [r0]
eors r1, r1, r3
stmia r0!, {r1}
ldr r1, [r0]
mov r2, r8
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
mov r2, r9
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03]
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
mov r2, r10
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
mov r2, r11
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
mov r2, r12
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
mov r2, lr
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0, #0]
ldr r2, [r0, #4]
ldr r3, [r0, #8]
eors r1, r1, r4
ldr r4, [r0, #12]
eors r2, r2, r5
eors r3, r3, r6
eors r4, r4, r7
stm r0!, {r1,r2,r3,r4}
@roll kRoll-c
ldr r0, [sp, #Xoofff_CompressFastLoop_kRoll]
ldmia r0!, {r7}
ldmia r0!, {r4-r6}
ldmia r0!, {r3}
ldmia r0!, {r1,r2}
mov r8, r1
mov r9, r2
ldmia r0!, {r1,r2}
str r1, [sp, #Xoofff_CompressFastLoop_offsetA03]
mov r10, r2
ldmia r0!, {r1,r2}
mov r11, r1
mov r12, r2
ldmia r0!, {r1}
mov lr, r1
lsls r1, r7, #13
eors r7, r7, r1
mov r1, r3
movs r2, #32-3
rors r1, r1, r2
eors r7, r7, r1
subs r0, r0, #Xoofff_BlockSize
stmia r0!, {r3}
mov r1, r8
mov r2, r9
stmia r0!, {r1,r2}
ldr r1, [sp, #Xoofff_CompressFastLoop_offsetA03]
mov r2, r10
stmia r0!, {r1,r2}
mov r1, r11
mov r2, r12
stmia r0!, {r1,r2}
mov r1, lr
stmia r0!, {r1,r4-r7}
@ loop management
ldr r0, [sp, #Xoofff_CompressFastLoop_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_CompressFastLoop_length]
bcc Xoofff_CompressFastLoop_Done
b Xoofff_CompressFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_CompressFastLoop_input]
ldr r1, [sp, #Xoofff_CompressFastLoop_iInput]
subs r0, r0, r1
add sp, #Xoofff_CompressFastLoop_SAS+8
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r1-r7,pc}
.align 4
.macro mStu rv, ro
strb \rv, [\ro, #0]
lsrs \rv, \rv, #8
strb \rv, [\ro, #1]
lsrs \rv, \rv, #8
strb \rv, [\ro, #2]
lsrs \rv, \rv, #8
strb \rv, [\ro, #3]
adds \ro, \ro, #4
@ ----------------------------------------------------------------------------
@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length)
@ offsets on stack
.equ Xoofff_ExpandFastLoop_offsetA03, 0
.equ Xoofff_ExpandFastLoop_offsetRC , 4
.equ Xoofff_ExpandFastLoop_SAS , 8
.equ Xoofff_ExpandFastLoop_yAccu , Xoofff_ExpandFastLoop_SAS+0
.equ Xoofff_ExpandFastLoop_output , Xoofff_ExpandFastLoop_SAS+4
.equ Xoofff_ExpandFastLoop_kRoll , Xoofff_ExpandFastLoop_SAS+8+16
.equ Xoofff_ExpandFastLoop_iOutput , Xoofff_ExpandFastLoop_SAS+12+16
.equ Xoofff_ExpandFastLoop_length , Xoofff_ExpandFastLoop_SAS+16+16
.global Xoofff_ExpandFastLoop
.type Xoofff_ExpandFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r0,r2,r4-r7}
sub sp, #Xoofff_ExpandFastLoop_SAS
ldm r0!, {r3,r5,r6,r7} @ get initial yAccu
mov r8, r5
mov r9, r6
str r7, [sp, #Xoofff_ExpandFastLoop_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
adr r1, Xoofff_ExpandFastLoop_RoundConstants6
str r1, [sp, #Xoofff_ExpandFastLoop_offsetRC]
mRound Xoofff_ExpandFastLoop_offsetRC, Xoofff_ExpandFastLoop_offsetA03
ldr r0, [sp, #Xoofff_ExpandFastLoop_offsetRC]
ldr r0, [r0]
cmp r0, #0
beq Xoofff_ExpandFastLoop_PermuteDone
b Xoofff_ExpandFastLoop_PermuteLoop
.long 0x00000060
.long 0x0000002C
.long 0x00000380
.long 0x000000F0
.long 0x000001A0
.long 0x00000012
.long 0
@ Add k and extract
ldr r0, [sp, #Xoofff_ExpandFastLoop_kRoll]
ldr r1, [sp, #Xoofff_ExpandFastLoop_output] @ add input
lsls r2, r1, #30
bne Xoofff_ExpandFastLoop_Unaligned
ldmia r0!, {r2}
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, r8
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, r9
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
ldr r3, [sp, #Xoofff_ExpandFastLoop_offsetA03]
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, r10
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, r11
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, r12
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, lr
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2,r3}
eors r2, r2, r4
eors r3, r3, r5
stmia r1!, {r2,r3}
ldmia r0!, {r2,r3}
eors r2, r2, r6
eors r3, r3, r7
stmia r1!, {r2,r3}
b Xoofff_ExpandFastLoop_ExtractDone
ldmia r0!, {r2}
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, r8
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, r9
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
ldr r3, [sp, #Xoofff_ExpandFastLoop_offsetA03]
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, r10
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, r11
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, r12
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, lr
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2,r3}
eors r2, r2, r4
mStu r2, r1
eors r3, r3, r5
mStu r3, r1
ldmia r0!, {r2,r3}
eors r2, r2, r6
mStu r2, r1
eors r3, r3, r7
mStu r3, r1
str r1, [sp, #Xoofff_ExpandFastLoop_output]
@ roll-e yAccu
ldr r0, [sp, #Xoofff_ExpandFastLoop_yAccu]
ldmia r0!, {r7}
ldmia r0!, {r4-r6}
ldmia r0!, {r3}
ldmia r0!, {r1,r2}
mov r8, r1
mov r9, r2
ldmia r0!, {r1,r2}
str r1, [sp, #Xoofff_ExpandFastLoop_offsetA03]
mov r10, r2
ldmia r0!, {r1,r2}
mov r11, r1
mov r12, r2
ldmia r0!, {r1}
mov lr, r1
mov r1, r10
ands r1, r1, r3
movs r2, #32-5
rors r7, r7, r2
eors r7, r7, r1
movs r2, #32-13
mov r1, r3
rors r1, r1, r2
eors r7, r7, r1
movs r1, #7
eors r7, r7, r1
subs r0, r0, #Xoofff_BlockSize
stmia r0!, {r3}
mov r1, r8
mov r2, r9
stmia r0!, {r1,r2}
ldr r1, [sp, #Xoofff_ExpandFastLoop_offsetA03]
mov r2, r10
stmia r0!, {r1,r2}
mov r1, r11
mov r2, r12
stmia r0!, {r1,r2}
mov r1, lr
stmia r0!, {r1,r4-r7}
@ loop management
ldr r0, [sp, #Xoofff_ExpandFastLoop_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_ExpandFastLoop_length]
bcc Xoofff_ExpandFastLoop_Done
b Xoofff_ExpandFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_ExpandFastLoop_output]
ldr r1, [sp, #Xoofff_ExpandFastLoop_iOutput]
subs r0, r0, r1
add sp, #Xoofff_ExpandFastLoop_SAS+8
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r1-r7,pc}
.align 4
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_h_
#define _Xoodoo_h_
#include <stdint.h>
#include <stdlib.h>
#define MAXROUNDS 12
#define NROWS 3
#define NCOLUMS 4
/* Round constants */
#define _rc12 0x00000058
#define _rc11 0x00000038
#define _rc10 0x000003C0
#define _rc9 0x000000D0
#define _rc8 0x00000120
#define _rc7 0x00000014
#define _rc6 0x00000060
#define _rc5 0x0000002C
#define _rc4 0x00000380
#define _rc3 0x000000F0
#define _rc2 0x000001A0
#define _rc1 0x00000012
#if !defined(ROTL32)
#if defined (__arm__) && !defined(__GNUC__)
#define ROTL32(a, offset) __ror(a, (32-(offset))%32)
#elif defined(_MSC_VER)
#define ROTL32(a, offset) _rotl(a, (offset)%32)
#define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32)))
#if !defined(READ32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress)))
#elif defined(_MSC_VER)
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#if !defined(WRITE32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData))
#elif defined(_MSC_VER)
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#if !defined(index)
#define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS))
typedef uint32_t tXoodooLane;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_parameters_h_
#define _Xoodyak_parameters_h_
#define Xoodyak_f_bPrime 48
#define Xoodyak_Rhash 16
#define Xoodyak_Rkin 44
#define Xoodyak_Rkout 24
#define Xoodyak_lRatchet 16
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv6m architecture (e.g., Cortex-M0).
.syntax unified
@ ----------------------------------------------------------------------------
@ offsets in RAM state
.equ _oA00 , 0*4
.equ _oA01 , 1*4
.equ _oA02 , 2*4
.equ _oA03 , 3*4
.equ _oA10 , 4*4
.equ _oA11 , 5*4
.equ _oA12 , 6*4
.equ _oA13 , 7*4
.equ _oA20 , 8*4
.equ _oA21 , 9*4
.equ _oA22 , 10*4
.equ _oA23 , 11*4
@ possible locations of state lanes
.equ locRegL , 1
.equ locRegH , 2
.equ locMem , 3
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _r2 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
@ ----------------------------------------------------------------------------
.macro mLoadU r, p, o, t
ldrb \r, [\p, #\o+0]
ldrb \t, [\p, #\o+1]
lsls \t, \t, #8
orrs \r, \r, \t
ldrb \t, [\p, #\o+2]
lsls \t, \t, #16
orrs \r, \r, \t
ldrb \t, [\p, #\o+3]
lsls \t, \t, #24
orrs \r, \r, \t
.macro mStoreU p, o, s, t, loc
.if \loc == locRegL
strb \s, [\p, #\o+0]
lsrs \t, \s, #8
mov \t, \s
strb \t, [\p, #\o+0]
lsrs \t, \t, #8
strb \t, [\p, #\o+1]
lsrs \t, \t, #8
strb \t, [\p, #\o+2]
lsrs \t, \t, #8
strb \t, [\p, #\o+3]
.macro mXor3 ro, a0, a1, a2, loc, tt
mov \ro, \a1
eors \ro, \ro, \a2
.if \loc == locRegL
eors \ro, \ro, \a0
.if \loc == locRegH
mov \tt, \a0
ldr \tt, [sp, #\a0]
eors \ro, \ro, \tt
.macro mXor ro, ri, tt, loc
.if \loc == locRegL
eors \ro, \ro, \ri
.if \loc == locRegH
mov \tt, \ro
eors \tt, \tt, \ri
mov \ro, \tt
ldr \tt, [sp, #\ro]
eors \tt, \tt, \ri
str \tt, [sp, #\ro]
.macro mChi3 a0,a1,a2,r0,r1,a0s,loc
mov \r1, \a2
mov \r0, \a1
bics \r1, \r1, \r0
eors \a0, \a0, \r1
.if \loc != locRegL
.if \loc == locRegH
mov \a0s, \a0
str \a0, [sp, #\a0s]
mov \r0, \a0
bics \r0, \r0, \a2
mov \r1, \a1
eors \r1, \r1, \r0
mov \a1, \r1
bics \r1, \r1, \a0
eors \a2, \a2, \r1
.macro mRound offsetRC, offsetA03
@ Theta: Column Parity Mixer
mXor3 r0, \offsetA03, lr, r7, locMem, r2
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r3, r10, r4, locRegL, r2
mXor r3, r1, r2, locRegL
mXor r10, r1, r2, locRegH
mXor r4, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r8, r11, r5, locRegH, r2
mXor r8, r1, r2, locRegH
mXor r11, r1, r2, locRegH
mXor r5, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r9, r12, r6, locRegH, r2
mXor r9, r1, r2, locRegH
mXor r12, r1, r2, locRegH
mXor r6, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor \offsetA03, r1, r2, locMem
mXor lr, r1, r2, locRegH
mXor r7, r1, r2, locRegL
@ Rho-west: Plane shift
movs r0, #32-_w1
rors r4, r4, r0
rors r5, r5, r0
rors r6, r6, r0
rors r7, r7, r0
mov r0, lr
mov lr, r12
mov r12, r11
mov r11, r10
mov r10, r0
@ Iota: round constant
ldr r0, [sp, #\offsetRC]
ldmia r0!, {r1}
str r0, [sp, #\offsetRC]
eors r3, r3, r1
@ Chi: non linear step, on colums
mChi3 r3, r10, r4, r0, r1, r3, locRegL
mov r2, r8
mChi3 r2, r11, r5, r0, r1, r8, locRegH
mov r2, r9
mChi3 r2, r12, r6, r0, r1, r9, locRegH
ldr r2, [sp, #\offsetA03]
mChi3 r2, lr, r7, r0, r1, \offsetA03, locMem
@ Rho-east: Plane shift
movs r0, #32-1
mov r1, r10
rors r1, r1, r0
mov r10, r1
mov r1, r11
rors r1, r1, r0
mov r11, r1
mov r1, r12
rors r1, r1, r0
mov r12, r1
mov r1, lr
rors r1, r1, r0
mov lr, r1
movs r0, #32-_e1
rors r4, r4, r0
rors r5, r5, r0
rors r6, r6, r0
rors r7, r7, r0
mov r0, r4
mov r4, r6
mov r6, r0
mov r0, r5
mov r5, r7
mov r7, r0
@ ----------------------------------------------------------------------------
@ Xoodoo_Permute_12roundsAsm
@ offsets on stack
.equ Xoodoo_Permute_12rounds_offsetA03 , 0
.equ Xoodoo_Permute_12rounds_offsetRC , 4
.equ Xoodoo_Permute_12rounds_offsetReturn, 8
.equ Xoodoo_Permute_12rounds_SAS , 12
.align 4
.type Xoodoo_Permute_12roundsAsm, %function;
adr r2, Xoodoo_Permute_RoundConstants12
str r2, [sp, #Xoodoo_Permute_12rounds_offsetRC]
mRound Xoodoo_Permute_12rounds_offsetRC, Xoodoo_Permute_12rounds_offsetA03
ldr r0, [sp, #Xoodoo_Permute_12rounds_offsetRC]
ldr r0, [r0]
cmp r0, #0
beq Xoodoo_Permute_12rounds_Done
b Xoodoo_Permute_12rounds_Loop
ldr r0, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
bx r0
.align 4
.long 0x00000058
.long 0x00000038
.long 0x000003C0
.long 0x000000D0
.long 0x00000120
.long 0x00000014
.long 0x00000060
.long 0x0000002C
.long 0x00000380
.long 0x000000F0
.long 0x000001A0
.long 0x00000012
.long 0
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@
@ X += Xoodyak_Rkin@
@ XLen -= Xoodyak_Rkin@
@ } while (XLen >= Xoodyak_Rkin)@
@ return initialLength - XLen@
@ }
.equ XoodyakAbsorb_offsetState , (Xoodoo_Permute_12rounds_SAS+0)
.equ XoodyakAbsorb_offsetX , (Xoodoo_Permute_12rounds_SAS+4)
.equ XoodyakAbsorb_offsetXLen , (Xoodoo_Permute_12rounds_SAS+8)
.equ XoodyakAbsorb_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+12)
.equ XoodyakAbsorb_SAS , (Xoodoo_Permute_12rounds_SAS+20)
.align 4
.global Xoodyak_AbsorbKeyedFullBlocks
.type Xoodyak_AbsorbKeyedFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakAbsorb_SAS
str r0, [sp, #XoodyakAbsorb_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakAbsorb_offsetX]
str r2, [sp, #XoodyakAbsorb_offsetInitialLen]
subs r2, r2, #44
str r2, [sp, #XoodyakAbsorb_offsetXLen]
ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
ldr r0, [sp, #XoodyakAbsorb_offsetX]
lsls r1, r0, #30
bne Xoodyak_AbsorbKeyedFullBlocks_Unaligned
ldmia r0!, {r1}
eors r3, r3, r1
ldmia r0!, {r1}
mov r2, r8
eors r2, r2, r1
mov r8, r2
ldmia r0!, {r1}
mov r2, r9
eors r2, r2, r1
mov r9, r2
ldmia r0!, {r1}
ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldmia r0!, {r1}
mov r2, r10
eors r2, r2, r1
mov r10, r2
ldmia r0!, {r1}
mov r2, r11
eors r2, r2, r1
mov r11, r2
ldmia r0!, {r1}
mov r2, r12
eors r2, r2, r1
mov r12, r2
ldmia r0!, {r1}
mov r2, lr
eors r2, r2, r1
mov lr, r2
ldmia r0!, {r1}
eors r4, r4, r1
ldmia r0!, {r1}
eors r5, r5, r1
ldmia r0!, {r1}
eors r6, r6, r1
str r0, [sp, #XoodyakAbsorb_offsetX]
movs r2, #1
eors r7, r7, r2
ldr r1, [sp, #XoodyakAbsorb_offsetXLen]
subs r1, r1, #44
str r1, [sp, #XoodyakAbsorb_offsetXLen]
bcs Xoodyak_AbsorbKeyedFullBlocks_Loop
ldr r0, [sp, #XoodyakAbsorb_offsetState]
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakAbsorb_offsetInitialLen]
ldr r2, [sp, #XoodyakAbsorb_offsetXLen]
adds r2, r2, #44
subs r0, r0, r2
add sp, #XoodyakAbsorb_SAS
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mLoadU r1, r0, 0, r2
eors r3, r3, r1
mLoadU r1, r0, 4, r2
mov r2, r8
eors r2, r2, r1
mov r8, r2
mLoadU r1, r0, 8, r2
mov r2, r9
eors r2, r2, r1
mov r9, r2
mLoadU r1, r0, 12, r2
ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
mLoadU r1, r0, 16, r2
mov r2, r10
eors r2, r2, r1
mov r10, r2
mLoadU r1, r0, 20, r2
mov r2, r11
eors r2, r2, r1
mov r11, r2
mLoadU r1, r0, 24, r2
mov r2, r12
eors r2, r2, r1
mov r12, r2
mLoadU r1, r0, 28, r2
mov r2, lr
eors r2, r2, r1
mov lr, r2
adds r0, r0, #32
mLoadU r1, r0, 0, r2
eors r4, r4, r1
mLoadU r1, r0, 4, r2
eors r5, r5, r1
mLoadU r1, r0, 8, r2
eors r6, r6, r1
adds r0, r0, #12
b Xoodyak_AbsorbKeyedFullBlocks_EndLoop
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@
@ X += Xoodyak_Rhash@
@ XLen -= Xoodyak_Rhash@
@ } while (XLen >= Xoodyak_Rhash)@
@ return initialLength - XLen@
@ }
.align 4
.global Xoodyak_AbsorbHashFullBlocks
.type Xoodyak_AbsorbHashFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakAbsorb_SAS
str r0, [sp, #XoodyakAbsorb_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakAbsorb_offsetX]
str r2, [sp, #XoodyakAbsorb_offsetInitialLen]
subs r2, r2, #16
str r2, [sp, #XoodyakAbsorb_offsetXLen]
ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
ldr r0, [sp, #XoodyakAbsorb_offsetX]
lsls r1, r0, #30
bne Xoodyak_AbsorbHashFullBlocks_Unaligned
ldmia r0!, {r1}
eors r3, r3, r1
ldmia r0!, {r1}
mov r2, r8
eors r2, r2, r1
mov r8, r2
ldmia r0!, {r1}
mov r2, r9
eors r2, r2, r1
mov r9, r2
ldmia r0!, {r1}
ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
str r0, [sp, #XoodyakAbsorb_offsetX]
movs r2, #1
mov r1, r10
eors r1, r1, r2
mov r10, r1
ldr r1, [sp, #XoodyakAbsorb_offsetXLen]
subs r1, r1, #16
str r1, [sp, #XoodyakAbsorb_offsetXLen]
bcs Xoodyak_AbsorbHashFullBlocks_Loop
ldr r0, [sp, #XoodyakAbsorb_offsetState]
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakAbsorb_offsetInitialLen]
ldr r2, [sp, #XoodyakAbsorb_offsetXLen]
adds r2, r2, #16
subs r0, r0, r2
add sp, #XoodyakAbsorb_SAS
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mLoadU r1, r0, 0, r2
eors r3, r3, r1
mLoadU r1, r0, 4, r2
mov r2, r8
eors r2, r2, r1
mov r8, r2
mLoadU r1, r0, 8, r2
mov r2, r9
eors r2, r2, r1
mov r9, r2
mLoadU r1, r0, 12, r2
ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
adds r0, r0, #16
b Xoodyak_AbsorbHashFullBlocks_EndLoop
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@
@ Y += Xoodyak_Rkout@
@ YLen -= Xoodyak_Rkout@
@ } while (YLen >= Xoodyak_Rkout)@
@ return initialLength - YLen@
@ }
.equ XoodyakSqueeze_offsetState , (Xoodoo_Permute_12rounds_SAS+0)
.equ XoodyakSqueeze_offsetY , (Xoodoo_Permute_12rounds_SAS+4)
.equ XoodyakSqueeze_offsetYLen , (Xoodoo_Permute_12rounds_SAS+8)
.equ XoodyakSqueeze_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+12)
.equ XoodyakSqueeze_SAS , (Xoodoo_Permute_12rounds_SAS+20)
.align 4
.global Xoodyak_SqueezeKeyedFullBlocks
.type Xoodyak_SqueezeKeyedFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakSqueeze_SAS
str r0, [sp, #XoodyakSqueeze_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakSqueeze_offsetY]
str r2, [sp, #XoodyakSqueeze_offsetInitialLen]
subs r2, r2, #24
str r2, [sp, #XoodyakSqueeze_offsetYLen]
ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
movs r0, #1
eors r3, r3, r0
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
ldr r0, [sp, #XoodyakSqueeze_offsetY]
lsls r1, r0, #30
bne Xoodyak_SqueezeKeyedFullBlocks_Unaligned
stmia r0!, {r3}
mov r1, r8
mov r2, r9
stmia r0!, {r1, r2}
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03]
mov r2, r10
stmia r0!, {r1, r2}
mov r1, r11
stmia r0!, {r1}
str r0, [sp, #XoodyakSqueeze_offsetY]
ldr r1, [sp, #XoodyakSqueeze_offsetYLen]
subs r1, r1, #24
str r1, [sp, #XoodyakSqueeze_offsetYLen]
bcs Xoodyak_SqueezeKeyedFullBlocks_Loop
ldr r0, [sp, #XoodyakSqueeze_offsetState] @ Save state
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakSqueeze_offsetInitialLen] @ Compute processed length
ldr r2, [sp, #XoodyakSqueeze_offsetYLen]
adds r2, r2, #24
subs r0, r0, r2
add sp, #XoodyakSqueeze_SAS @ Free stack and pop
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mStoreU r0, 0, r3, r2, locRegL
mStoreU r0, 4, r8, r2, locRegH
mStoreU r0, 8, r9, r2, locRegH
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03]
mStoreU r0, 12, r1, r2, locRegL
mStoreU r0, 16, r10, r2, locRegH
mStoreU r0, 20, r11, r2, locRegH
adds r0, r0, #24
b Xoodyak_SqueezeKeyedFullBlocks_EndLoop
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@
@ Y += Xoodyak_Rhash@
@ YLen -= Xoodyak_Rhash@
@ } while (YLen >= Xoodyak_Rhash)@
@ return initialLength - YLen@
@ }
.align 4
.global Xoodyak_SqueezeHashFullBlocks
.type Xoodyak_SqueezeHashFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakSqueeze_SAS
str r0, [sp, #XoodyakSqueeze_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakSqueeze_offsetY]
str r2, [sp, #XoodyakSqueeze_offsetInitialLen]
subs r2, r2, #16
str r2, [sp, #XoodyakSqueeze_offsetYLen]
ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
movs r0, #1
eors r3, r3, r0
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
ldr r0, [sp, #XoodyakSqueeze_offsetY]
lsls r1, r0, #30
bne Xoodyak_SqueezeHashFullBlocks_Unaligned
stmia r0!, {r3}
mov r1, r8
mov r2, r9
stmia r0!, {r1, r2}
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stmia r0!, {r1}
str r0, [sp, #XoodyakSqueeze_offsetY]
ldr r1, [sp, #XoodyakSqueeze_offsetYLen]
subs r1, r1, #16
str r1, [sp, #XoodyakSqueeze_offsetYLen]
bcs Xoodyak_SqueezeHashFullBlocks_Loop
ldr r0, [sp, #XoodyakSqueeze_offsetState] @ Save state
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakSqueeze_offsetInitialLen] @ Compute processed length
ldr r2, [sp, #XoodyakSqueeze_offsetYLen]
adds r2, r2, #16
subs r0, r0, r2
add sp, #XoodyakSqueeze_SAS @ Free stack and pop
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mStoreU r0, 0, r3, r2, locRegL
mStoreU r0, 4, r8, r2, locRegH
mStoreU r0, 8, r9, r2, locRegH
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03]
mStoreU r0, 12, r1, r2, locRegL
adds r0, r0, #16
b Xoodyak_SqueezeHashFullBlocks_EndLoop
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.equ XoodyakCrypt_offsetState , (Xoodoo_Permute_12rounds_SAS+0)
.equ XoodyakCrypt_offsetI , (Xoodoo_Permute_12rounds_SAS+4)
.equ XoodyakCrypt_offsetO , (Xoodoo_Permute_12rounds_SAS+8)
.equ XoodyakCrypt_offsetIOLen , (Xoodoo_Permute_12rounds_SAS+12)
.equ XoodyakCrypt_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+16)
.equ XoodyakCrypt_SAS , (Xoodoo_Permute_12rounds_SAS+20)
.align 4
.global Xoodyak_EncryptFullBlocks
.type Xoodyak_EncryptFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakCrypt_SAS
str r0, [sp, #XoodyakCrypt_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakCrypt_offsetI]
str r2, [sp, #XoodyakCrypt_offsetO]
str r3, [sp, #XoodyakCrypt_offsetInitialLen]
subs r3, r3, #24
str r3, [sp, #XoodyakCrypt_offsetIOLen]
ldr r5, =Xoodyak_EncryptFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
push {r4, r5}
ldr r5, [sp, #XoodyakCrypt_offsetI+8]
ldr r4, [sp, #XoodyakCrypt_offsetO+8]
mov r0, r4
ands r0, r0, r5
lsls r0, r0, #30
bne Xoodyak_EncryptFullBlocks_Unaligned
ldmia r5!, {r0}
eors r3, r3, r0
stmia r4!, {r3}
ldmia r5!, {r0}
mov r1, r8
eors r1, r1, r0
stmia r4!, {r1}
mov r8, r1
ldmia r5!, {r0}
mov r1, r9
eors r1, r1, r0
stmia r4!, {r1}
mov r9, r1
ldmia r5!, {r0}
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
eors r1, r1, r0
stmia r4!, {r1}
str r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
ldmia r5!, {r0}
mov r1, r10
eors r1, r1, r0
stmia r4!, {r1}
mov r10, r1
ldmia r5!, {r0}
mov r1, r11
eors r1, r1, r0
stmia r4!, {r1}
mov r11, r1
movs r0, #1
mov r1, r12
eors r1, r1, r0
mov r12, r1
str r5, [sp, #XoodyakCrypt_offsetI+8]
str r4, [sp, #XoodyakCrypt_offsetO+8]
pop {r4, r5}
ldr r1, [sp, #XoodyakCrypt_offsetIOLen]
subs r1, r1, #24
str r1, [sp, #XoodyakCrypt_offsetIOLen]
bcs Xoodyak_EncryptFullBlocks_Loop
ldr r0, [sp, #XoodyakCrypt_offsetState] @ Save state
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakCrypt_offsetInitialLen] @ Compute processed length
ldr r2, [sp, #XoodyakCrypt_offsetIOLen]
adds r2, r2, #24
subs r0, r0, r2
add sp, #XoodyakCrypt_SAS @ Free stack and pop
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mLoadU r0, r5, 0, r2
eors r3, r3, r0
mStoreU r4, 0, r3, r2, locRegL
mLoadU r0, r5, 4, r2
mov r1, r8
eors r1, r1, r0
mStoreU r4, 4, r1, r2, locRegL
mov r8, r1
mLoadU r0, r5, 8, r2
mov r1, r9
eors r1, r1, r0
mStoreU r4, 8, r1, r2, locRegL
mov r9, r1
mLoadU r0, r5, 12, r2
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
eors r1, r1, r0
mStoreU r4, 12, r1, r2, locRegL
str r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
mLoadU r0, r5, 16, r2
mov r1, r10
eors r1, r1, r0
mStoreU r4, 16, r1, r2, locRegL
mov r10, r1
mLoadU r0, r5, 20, r2
mov r1, r11
eors r1, r1, r0
mStoreU r4, 20, r1, r2, locRegL
mov r11, r1
adds r4, r4, #24
adds r5, r5, #24
b Xoodyak_EncryptFullBlocks_EndLoop
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.align 4
.global Xoodyak_DecryptFullBlocks
.type Xoodyak_DecryptFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakCrypt_SAS
str r0, [sp, #XoodyakCrypt_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakCrypt_offsetI]
str r2, [sp, #XoodyakCrypt_offsetO]
str r3, [sp, #XoodyakCrypt_offsetInitialLen]
subs r3, r3, #24
str r3, [sp, #XoodyakCrypt_offsetIOLen]
ldr r5, =Xoodyak_DecryptFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
push {r4, r5}
ldr r5, [sp, #XoodyakCrypt_offsetI+8]
ldr r4, [sp, #XoodyakCrypt_offsetO+8]
mov r0, r4
ands r0, r0, r5
lsls r0, r0, #30
bne Xoodyak_DecryptFullBlocks_Unaligned
ldmia r5!, {r0}
eors r3, r3, r0
stmia r4!, {r3}
mov r3, r0
ldmia r5!, {r0}
mov r1, r8
eors r1, r1, r0
stmia r4!, {r1}
mov r8, r0
ldmia r5!, {r0}
mov r1, r9
eors r1, r1, r0
stmia r4!, {r1}
mov r9, r0
ldmia r5!, {r0}
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
eors r1, r1, r0
stmia r4!, {r1}
str r0, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
ldmia r5!, {r0}
mov r1, r10
eors r1, r1, r0
stmia r4!, {r1}
mov r10, r0
ldmia r5!, {r0}
mov r1, r11
eors r1, r1, r0
stmia r4!, {r1}
mov r11, r0
movs r0, #1
mov r1, r12
eors r1, r1, r0
mov r12, r1
str r5, [sp, #XoodyakCrypt_offsetI+8]
str r4, [sp, #XoodyakCrypt_offsetO+8]
pop {r4, r5}
ldr r1, [sp, #XoodyakCrypt_offsetIOLen]
subs r1, r1, #24
str r1, [sp, #XoodyakCrypt_offsetIOLen]
bcs Xoodyak_DecryptFullBlocks_Loop
ldr r0, [sp, #XoodyakCrypt_offsetState] @ Save state
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakCrypt_offsetInitialLen] @ Compute processed length
ldr r2, [sp, #XoodyakCrypt_offsetIOLen]
adds r2, r2, #24
subs r0, r0, r2
add sp, #XoodyakCrypt_SAS @ Free stack and pop
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mLoadU r0, r5, 0, r2
eors r3, r3, r0
mStoreU r4, 0, r3, r2, locRegL
mov r3, r0
mLoadU r0, r5, 4, r2
mov r1, r8
eors r1, r1, r0
mStoreU r4, 4, r1, r2, locRegL
mov r8, r0
mLoadU r0, r5, 8, r2
mov r1, r9
eors r1, r1, r0
mStoreU r4, 8, r1, r2, locRegL
mov r9, r0
mLoadU r0, r5, 12, r2
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
eors r1, r1, r0
mStoreU r4, 12, r1, r2, locRegL
str r0, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
mLoadU r0, r5, 16, r2
mov r1, r10
eors r1, r1, r0
mStoreU r4, 16, r1, r2, locRegL
mov r10, r0
mLoadU r0, r5, 20, r2
mov r1, r11
eors r1, r1, r0
mStoreU r4, 20, r1, r2, locRegL
mov r11, r0
adds r4, r4, #24
adds r5, r5, #24
b Xoodyak_DecryptFullBlocks_EndLoop
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifdef XoodooReference
#include "displayIntermediateValues.h"
#include <assert.h>
#include <string.h>
#include "Xoodyak.h"
#ifdef OUTPUT
#include <stdlib.h>
#include <string.h>
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length);
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length)
unsigned int i;
fprintf(f, "%s:", synopsis);
for(i=0; i<length; i++)
fprintf(f, " %02x", (unsigned int)data[i]);
fprintf(f, "\n");
#define MyMin(a,b) (((a) < (b)) ? (a) : (b))
#ifdef XKCP_has_Xoodoo
#include "Xoodoo-SnP.h"
#define SnP Xoodoo
#define SnP_Permute Xoodoo_Permute_12rounds
#define prefix Xoodyak
#include ""
#undef prefix
#undef SnP
#undef SnP_Permute
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_h_
#define _Xoodyak_h_
#include "config.h"
#ifdef XKCP_has_Xoodoo
#include <stddef.h>
#include "Cyclist.h"
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment)
#error This requires an implementation of Xoodoo
The eXtended Keccak Code Package (XKCP)
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _align_h_
#define _align_h_
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
#ifdef ALIGN
#undef ALIGN
#if defined(__GNUC__)
#define ALIGN(x) __attribute__ ((aligned(x)))
#elif defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#elif defined(__ARMCC_VERSION)
#define ALIGN(x) __align(x)
#define ALIGN(x)
#define CRYPTO_ABYTES 16
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
#elif defined( LITTLE_ENDIAN )
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
#elif defined( _LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
#elif defined( __LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# endif
#elif defined( __BIG_ENDIAN__ )
#elif defined( __LITTLE_ENDIAN__ )
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \
defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ )
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# else
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
/* File generated by ToTargetConfigFile.xsl */
#define XKCP_has_Xoodyak
#define XKCP_has_Xoodoo
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include "crypto_aead.h"
#include "api.h"
#include "Xoodyak.h"
#include <string.h>
#if !defined(CRYPTO_KEYBYTES)
#if !defined(CRYPTO_NPUBBYTES)
#define TAGLEN 16
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
Xoodyak_Instance instance;
Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES);
Xoodyak_Absorb(&instance, ad, (size_t)adlen);
Xoodyak_Encrypt(&instance, m, c, (size_t)mlen);
Xoodyak_Squeeze(&instance, c + mlen, TAGLEN);
*clen = mlen + TAGLEN;
#if 0
unsigned int i;
for (i = 0; i < *clen; ++i )
printf("\\x%02x", c[i] );
return 0;
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
Xoodyak_Instance instance;
unsigned char tag[TAGLEN];
unsigned long long mlen_;
*mlen = 0;
if (clen < TAGLEN) {
return -1;
mlen_ = clen - TAGLEN;
Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES);
Xoodyak_Absorb(&instance, ad, (size_t)adlen);
Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_);
Xoodyak_Squeeze(&instance, tag, TAGLEN);
if (memcmp(tag, c + mlen_, TAGLEN) != 0) {
memset(m, 0, (size_t)mlen_);
return -1;
*mlen = mlen_;
return 0;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Cyclist_h_
#define _Cyclist_h_
#include <stdint.h>
#include "align.h"
#define Cyclist_ModeHash 1
#define Cyclist_ModeKeyed 2
#define Cyclist_PhaseDown 1
#define Cyclist_PhaseUp 2
#ifdef OUTPUT
#include <stdio.h>
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
uint8_t stateShadow[size]; \
FILE *file; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistFunctions(prefix) \
void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \
void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \
void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \
void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \
void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \
void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \
void prefix##_Ratchet(prefix##_Instance *instance);
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
#define SnP_Initialize JOIN(SnP, _Initialize)
#define SnP_AddBytes JOIN(SnP, _AddBytes)
#define SnP_AddByte JOIN(SnP, _AddByte)
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
#define Cyclist_Instance JOIN(prefix, _Instance)
#define Cyclist_Initialize JOIN(prefix, _Initialize)
#define Cyclist_Absorb JOIN(prefix, _Absorb)
#define Cyclist_Encrypt JOIN(prefix, _Encrypt)
#define Cyclist_Decrypt JOIN(prefix, _Decrypt)
#define Cyclist_Squeeze JOIN(prefix, _Squeeze)
#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey)
#define Cyclist_Ratchet JOIN(prefix, _Ratchet)
#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny)
#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey)
#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny)
#define Cyclist_Down JOIN(prefix, _Down)
#define Cyclist_Up JOIN(prefix, _Up)
#define Cyclist_Crypt JOIN(prefix, _Crypt)
#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime)
#define Cyclist_Rhash JOIN(prefix, _Rhash)
#define Cyclist_Rkin JOIN(prefix, _Rkin)
#define Cyclist_Rkout JOIN(prefix, _Rkout)
#define Cyclist_lRatchet JOIN(prefix, _lRatchet)
#if defined(CyclistFullBlocks_supported)
#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks)
#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks)
#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks)
#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks)
#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks)
#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks)
/* ------- Cyclist internal interfaces ------- */
static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd)
SnP_AddBytes(instance->state, Xi, 0, XiLen);
SnP_AddByte(instance->state, 0x01, XiLen);
SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1);
instance->phase = Cyclist_PhaseDown;
static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu)
#if defined(OUTPUT)
uint8_t s[Cyclist_f_bPrime];
if (instance->mode != Cyclist_ModeHash) {
SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1);
#if defined(OUTPUT)
if (instance->file != NULL) {
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime );
SnP_Permute( instance->state );
#if defined(OUTPUT)
if (instance->file != NULL) {
memcpy( instance->stateShadow, instance->state, sizeof(instance->state) );
fprintf( instance->file, "Data XORed" );
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
fprintf( instance->file, "After f() ");
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
instance->phase = Cyclist_PhaseUp;
SnP_ExtractBytes( instance->state, Yi, 0, YiLen );
static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd)
unsigned int splitLen;
do {
if (instance->phase != Cyclist_PhaseUp) {
Cyclist_Up(instance, NULL, 0, 0);
splitLen = MyMin(XLen, r);
Cyclist_Down(instance, X, splitLen, Cd);
Cd = 0;
X += splitLen;
XLen -= splitLen;
#if defined(CyclistFullBlocks_supported)
if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
} while ( XLen != 0 );
static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
uint8_t KID[Cyclist_Rkin];
assert(instance->mode == Cyclist_ModeHash);
assert((KLen + IDLen) <= (Cyclist_Rkin - 1));
instance->mode = Cyclist_ModeKeyed;
instance->Rabsorb = Cyclist_Rkin;
instance->Rsqueeze = Cyclist_Rkout;
if (KLen != 0) {
memcpy(KID, K, KLen);
memcpy(KID + KLen, ID, IDLen);
KID[KLen + IDLen] = (uint8_t)IDLen;
Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02);
if (counterLen != 0) {
Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00);
static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu)
unsigned int len;
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, Cu);
Y += len;
YLen -= len;
while (YLen != 0) {
#if defined(CyclistFullBlocks_supported)
if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
Cyclist_Down(instance, NULL, 0, 0);
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, 0);
Y += len;
YLen -= len;
static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt)
unsigned int splitLen;
uint8_t P[Cyclist_Rkout];
uint8_t Cu = 0x80;
do {
if (decrypt != 0) {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, O, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
else {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
memcpy(P, I, splitLen);
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, P, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
Cu = 0x00;
} while ( IOLen != 0 );
/* ------- Cyclist interfaces ------- */
void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
instance->phase = Cyclist_PhaseUp;
instance->mode = Cyclist_ModeHash;
instance->Rabsorb = Cyclist_Rhash;
instance->Rsqueeze = Cyclist_Rhash;
#ifdef OUTPUT
instance->file = 0;
SnP_Initialize( instance->stateShadow );
if (KLen != 0) {
Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen);
void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen)
Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03);
void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, P, C, PLen, 0);
void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, C, P, CLen, 1);
void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen)
Cyclist_SqueezeAny(instance, Y, YLen, 0x40);
void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_SqueezeAny(instance, K, KLen, 0x20);
void Cyclist_Ratchet(Cyclist_Instance *instance)
uint8_t buffer[Cyclist_lRatchet];
assert(instance->mode == Cyclist_ModeKeyed);
/* Squeeze then absorb is the same as overwriting with zeros */
Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10);
Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00);
#undef SnP_StaticInitialize
#undef SnP_Initialize
#undef SnP_AddBytes
#undef SnP_AddByte
#undef SnP_OverwriteBytes
#undef SnP_ExtractBytes
#undef SnP_ExtractAndAddBytes
#undef Cyclist_Instance
#undef Cyclist_Initialize
#undef Cyclist_Absorb
#undef Cyclist_Encrypt
#undef Cyclist_Decrypt
#undef Cyclist_Squeeze
#undef Cyclist_SqueezeKey
#undef Cyclist_Ratchet
#undef Cyclist_AbsorbAny
#undef Cyclist_AbsorbKey
#undef Cyclist_SqueezeAny
#undef Cyclist_Down
#undef Cyclist_Up
#undef Cyclist_Crypt
#undef Cyclist_f_bPrime
#undef Cyclist_Rhash
#undef Cyclist_Rkin
#undef Cyclist_Rkout
#undef Cyclist_lRatchet
#if defined(CyclistFullBlocks_supported)
#undef Cyclist_AbsorbKeyedFullBlocks
#undef Cyclist_AbsorbHashFullBlocks
#undef Cyclist_SqueezeKeyedFullBlocks
#undef Cyclist_SqueezeHashFullBlocks
#undef Cyclist_EncryptFullBlocks
#undef Cyclist_DecryptFullBlocks
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_SnP_h_
#define _Xoodoo_SnP_h_
#include <stddef.h>
#include <stdint.h>
/** For the documentation, see SnP-documentation.h.
#define Xoodoo_implementation "32-bit optimized ARM assembler implementation"
#define Xoodoo_stateSizeInBytes (3*4*4)
#define Xoodoo_stateAlignment 4
#define Xoodoo_StaticInitialize()
void Xoodoo_Initialize(void *state);
#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData)
void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount);
//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds);
void Xoodoo_Permute_6rounds(void *state);
void Xoodoo_Permute_12rounds(void *state);
void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length);
#define Xoodoo_FastXoofff_supported
void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen);
size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length);
size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length);
#define CyclistFullBlocks_supported
size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (Cortex-M3, ...).
.syntax unified
@ ----------------------------------------------------------------------------
@ void Xoodoo_Initialize(void *state)
.align 4
.global Xoodoo_Initialize
.type Xoodoo_Initialize, %function;
movs r1, #0
movs r2, #0
movs r3, #0
movs r12, #0
stmia r0!, { r1 - r3, r12 }
stmia r0!, { r1 - r3, r12 }
stmia r0!, { r1 - r3, r12 }
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_AddBytes
.type Xoodoo_AddBytes, %function;
push {r4,lr}
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_AddBytes_Bytes
Xoodoo_AddBytes_LanesLoop: @ then, perform on lanes
ldr r2, [r0]
ldr r4, [r1], #4
eors r2, r2, r4
str r2, [r0], #4
subs r3, r3, #4
bcs Xoodoo_AddBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_AddBytes_Exit
ldrb r2, [r0]
ldrb r4, [r1], #1
eors r2, r2, r4
strb r2, [r0], #1
subs r3, r3, #1
bcs Xoodoo_AddBytes_BytesLoop
pop {r4,pc}
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_OverwriteBytes
.type Xoodoo_OverwriteBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_OverwriteBytes_Bytes
Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words
ldr r2, [r1], #4
str r2, [r0], #4
subs r3, r3, #4
bcs Xoodoo_OverwriteBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_OverwriteBytes_Exit
ldrb r2, [r1], #1
strb r2, [r0], #1
subs r3, r3, #1
bcs Xoodoo_OverwriteBytes_BytesLoop
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
.global Xoodoo_OverwriteWithZeroes
.type Xoodoo_OverwriteWithZeroes, %function;
movs r3, #0
lsrs r2, r1, #2
beq Xoodoo_OverwriteWithZeroes_Bytes
str r3, [r0], #4
subs r2, r2, #1
bne Xoodoo_OverwriteWithZeroes_LoopLanes
ands r1, #3
beq Xoodoo_OverwriteWithZeroes_Exit
strb r3, [r0], #1
subs r1, r1, #1
bne Xoodoo_OverwriteWithZeroes_LoopBytes
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_ExtractBytes
.type Xoodoo_ExtractBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractBytes_Bytes
Xoodoo_ExtractBytes_LanesLoop: @ then, handle words
ldr r2, [r0], #4
str r2, [r1], #4
subs r3, r3, #4
bcs Xoodoo_ExtractBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_ExtractBytes_Exit
ldrb r2, [r0], #1
strb r2, [r1], #1
subs r3, r3, #1
bcs Xoodoo_ExtractBytes_BytesLoop
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
.global Xoodoo_ExtractAndAddBytes
.type Xoodoo_ExtractAndAddBytes, %function;
push {r4,r5}
adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length)
ldr r3, [sp, #8] @ get length argument from stack
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractAndAddBytes_Bytes
Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, handle words
ldr r5, [r0], #4
ldr r4, [r1], #4
eors r5, r5, r4
str r5, [r2], #4
subs r3, r3, #4
bcs Xoodoo_ExtractAndAddBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_ExtractAndAddBytes_Exit
ldrb r5, [r0], #1
ldrb r4, [r1], #1
eors r5, r5, r4
strb r5, [r2], #1
subs r3, r3, #1
bcs Xoodoo_ExtractAndAddBytes_BytesLoop
pop {r4,r5}
bx lr
.align 4
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _t3 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
.equ _rc12 , 0x00000058
.equ _rc11 , 0x00000038
.equ _rc10 , 0x000003C0
.equ _rc9 , 0x000000D0
.equ _rc8 , 0x00000120
.equ _rc7 , 0x00000014
.equ _rc6 , 0x00000060
.equ _rc5 , 0x0000002C
.equ _rc4 , 0x00000380
.equ _rc3 , 0x000000F0
.equ _rc2 , 0x000001A0
.equ _rc1 , 0x00000012
.equ _rc6x1, 0x00000003
.equ _rc5x2, 0x0b000000
.equ _rc4x3, 0x07000000
.equ _rc3x4, 0x000f0000
.equ _rc2x5, 0x0000d000
.equ _rc1x6, 0x00000048
.equ _rc12x1, 0xc0000002
.equ _rc11x2, 0x0e000000
.equ _rc10x3, 0x07800000
.equ _rc9x4 , 0x000d0000
.equ _rc8x5 , 0x00009000
.equ _rc7x6 , 0x00000050
.equ _rc6x7 , 0x0000000c
.equ _rc5x8 , 0x2c000000
.equ _rc4x9 , 0x1c000000
.equ _rc3x10, 0x003c0000
.equ _rc2x11, 0x00034000
.equ _rc1x12, 0x00000120
@ ----------------------------------------------------------------------------
.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2
.if ((\rho_e1)%32) == 0
eors \ro, \a0, \a1
eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32
.if ((\rho_e2)%32) == 0
eors \ro, \ro, \a2
eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32
.macro mRliXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ro, \ri, ROR #(32-(\rot))%32
.macro mRloXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ri, \ro, ROR #(32-(\rot))%32
.macro mChi3 a0,a1,a2,r0,r1
bic \r0, \a2, \a1, ROR #_w1
eors \a0, \a0, \r0, ROR #32-_w1
bic \r1, \a0, \a2, ROR #32-_w1
eors \a1, \a1, \r1
bic \r1, \a1, \a0
eors \a2, \a2, \r1, ROR #_w1
.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc
@ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations)
mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2
mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r2, r0, 32-_r0
mRloXor \r6i, r0, \rho_e1-_r0
mRloXor \r10i, r0, \rho_we2-_r0
mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2
mRliXor r1, r1, _r1-_r0
mRloXor r3, r1, 32-_r0
mRloXor \r7i, r1, \rho_e1-_r0
mRloXor \r11i, r1, \rho_we2-_r0
mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r4, r0, 32-_r0
mRloXor \r8i, r0, \rho_e1-_r0
mRloXor \r12i, r0, \rho_we2-_r0
mRliXor r1, r1, _r1-_r0
mRloXor r5, r1, 32-_r0
mRloXor \r9i, r1, \rho_e1-_r0
mRloXor \lri, r1, \rho_we2-_r0
@ After Theta the whole state is rotated -r0
@ from here we must use a1.w instead of a1.i
@ Iota: round constant
.if \rc == 0xc0000002
eor r2, r2, #0x00000002
eor r2, r2, #0xc0000000
eor r2, r2, #\rc
@ Chi: non linear step, on colums
mChi3 r2, \r6w, \r10i, r0, r1
mChi3 r3, \r7w, \r11i, r0, r1
mChi3 r4, \r8w, \r12i, r0, r1
mChi3 r5, \r9w, \lri, r0, r1
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_6rounds( void *state )
.global Xoodoo_Permute_6rounds
.type Xoodoo_Permute_6rounds, %function;
push {r0,r4-r11,lr}
ldmia r0!, {r2-r5}
ldmia r0!, {r8-r9}
ldmia r0!, {r6-r7}
ldmia r0, {r10-r12,lr}
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
pop {r0,r1}
ror r2, r2, #32-(6*_r0)%32
ror r3, r3, #32-(6*_r0)%32
ror r4, r4, #32-(6*_r0)%32
ror r5, r5, #32-(6*_r0)%32
ror r6, r6, #32-(6*_r0+1)%32
ror r7, r7, #32-(6*_r0+1)%32
ror r8, r8, #32-(6*_r0+1)%32
ror r9, r9, #32-(6*_r0+1)%32
ror r10, r10, #32-(6*_r0+_e1+_w1)%32
ror r11, r11, #32-(6*_r0+_e1+_w1)%32
ror r12, r12, #32-(6*_r0+_e1+_w1)%32
ror lr, lr, #32-(6*_r0+_e1+_w1)%32
stmia r0, {r2-r12,lr}
mov r4, r1
pop {r5-r11,pc}
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_12rounds( void *state )
.global Xoodoo_Permute_12rounds
.type Xoodoo_Permute_12rounds, %function;
push {r0,r4-r11,lr}
ldmia r0, {r2-r12,lr}
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12
ror r2, r2, #32-(12*_r0)%32
ror r3, r3, #32-(12*_r0)%32
ror r4, r4, #32-(12*_r0)%32
ror r5, r5, #32-(12*_r0)%32
ror r6, r6, #32-(12*_r0+1)%32
ror r7, r7, #32-(12*_r0+1)%32
ror r8, r8, #32-(12*_r0+1)%32
ror r9, r9, #32-(12*_r0+1)%32
ror r10, r10, #32-(12*_r0+_e1+_w1)%32
ror r11, r11, #32-(12*_r0+_e1+_w1)%32
ror r12, r12, #32-(12*_r0+_e1+_w1)%32
ror lr, lr, #32-(12*_r0+_e1+_w1)%32
pop {r0,r1}
stmia r0, {r2-r12,lr}
mov r4, r1
pop {r5-r11,pc}
.align 4
.equ Xoofff_BlockSize , 3*4*4
@ ----------------------------------------------------------------------------
@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen)
.global Xoofff_AddIs
.type Xoofff_AddIs, %function;
push {r4-r10,lr}
subs r2, r2, #Xoofff_BlockSize*8
bcc Xoofff_AddIs_LessThanBlock
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
subs r2, r2, #Xoofff_BlockSize*8
bcs Xoofff_AddIs_BlockLoop
adds r2, r2, #Xoofff_BlockSize*8
beq Xoofff_AddIs_Return
subs r2, r2, #16*8
bcc Xoofff_AddIs_LessThan16
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
subs r2, r2, #16*8
bcs Xoofff_AddIs_16Loop
adds r2, r2, #16*8
beq Xoofff_AddIs_Return
subs r2, r2, #4*8
bcc Xoofff_AddIs_LessThan4
ldr r3, [r0]
ldr r7, [r1], #4
eors r3, r3, r7
str r3, [r0], #4
subs r2, r2, #4*8
bcs Xoofff_AddIs_4Loop
adds r2, r2, #4*8
beq Xoofff_AddIs_Return
subs r2, r2, #8
bcc Xoofff_AddIs_LessThan1
ldrb r3, [r0]
ldrb r7, [r1], #1
eors r3, r3, r7
strb r3, [r0], #1
subs r2, r2, #8
bcs Xoofff_AddIs_1Loop
adds r2, r2, #8
beq Xoofff_AddIs_Return
ldrb r3, [r0]
ldrb r7, [r1]
movs r1, #1
eors r3, r3, r7
lsls r1, r1, r2
subs r1, r1, #1
ands r3, r3, r1
strb r3, [r0]
pop {r4-r10,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length)
.equ Xoofff_Compress_kRoll , 0
.equ Xoofff_Compress_input , 4
.equ Xoofff_Compress_xAccu , 8
.equ Xoofff_Compress_iInput , 12
.equ Xoofff_Compress_length , 16
.global Xoofff_CompressFastLoop
.type Xoofff_CompressFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r12,lr}
push {r0,r2}
ldmia r0, {r2-r12,lr} @ get initial kRoll
ldr r0, [sp, #Xoofff_Compress_input] @ add input
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r5, r5, r1
ldr r1, [r0], #4
eors r6, r6, r1
ldr r1, [r0], #4
eors r7, r7, r1
ldr r1, [r0], #4
eors r8, r8, r1
ldr r1, [r0], #4
eors r9, r9, r1
ldr r1, [r0], #4
eors r10, r10, r1
ldr r1, [r0], #4
eors r11, r11, r1
ldr r1, [r0], #4
eors r12, r12, r1
ldr r1, [r0], #4
eors lr, lr, r1
str r0, [sp, #Xoofff_Compress_input]
@ permutation
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
@ Extract and add into xAccu
ldr r0, [sp, #Xoofff_Compress_xAccu]
ldr r1, [r0]
mRloXor r2, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r2, [r0], #4
mRloXor r3, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r3, [r0], #4
mRloXor r4, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r4, [r0], #4
mRloXor r5, r1, (6*_r0)%32
str r5, [r0], #4
ldm r0, {r2-r5} @ note that r6-r8 and r7-r9 are swapped
mRliXor r2, r8, (6*_r0+1)%32
mRliXor r3, r9, (6*_r0+1)%32
mRliXor r4, r6, (6*_r0+1)%32
mRliXor r5, r7, (6*_r0+1)%32
stm r0!, {r2-r5}
ldm r0, {r2-r5}
mRliXor r2, r10, (6*_r0+_e1+_w1)%32
mRliXor r3, r11, (6*_r0+_e1+_w1)%32
mRliXor r4, r12, (6*_r0+_e1+_w1)%32
mRliXor r5, lr, (6*_r0+_e1+_w1)%32
stm r0!, {r2-r5}
@roll kRoll
ldr r0, [sp, #Xoofff_Compress_kRoll]
ldr lr, [r0], #4
ldmia r0!, {r10-r12}
ldmia r0!, {r2-r9}
eors lr, lr, lr, LSL #13
eors lr, lr, r2, ROR #32-3
sub r0, #Xoofff_BlockSize
stmia r0, {r2-r12,lr}
@ loop management
ldr r0, [sp, #Xoofff_Compress_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_Compress_length]
bcs Xoofff_CompressFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_Compress_input]
ldr r1, [sp, #Xoofff_Compress_iInput]
sub r0, r0, r1
pop {r1,r2}
pop {r1-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length)
.equ Xoofff_Expand_yAccu , 0
.equ Xoofff_Expand_output , 4
.equ Xoofff_Expand_kRoll , 8
.equ Xoofff_Expand_iOutput , 12
.equ Xoofff_Expand_length , 16
.global Xoofff_ExpandFastLoop
.type Xoofff_ExpandFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r12,lr}
push {r0,r2}
ldmia r0, {r2-r12,lr} @ get initial yAccu
@ permutation
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
@ Add k and extract
ldr r0, [sp, #Xoofff_Expand_kRoll]
ldr r1, [r0], #4
mRloXor r2, r1, (6*_r0)%32
ldr r1, [sp, #Xoofff_Expand_output]
str r2, [r1], #4
ldr r2, [r0], #4
mRloXor r3, r2, (6*_r0)%32
ldr r2, [r0], #4
str r3, [r1], #4
mRloXor r4, r2, (6*_r0)%32
ldr r2, [r0], #4
str r4, [r1], #4
mRloXor r5, r2, (6*_r0)%32
str r5, [r1], #4
ldm r0!, {r2-r5} @ Note that r6-r8 and r7-r9 are swapped
mRliXor r2, r8, (6*_r0+1)%32
str r2, [r1], #4
mRliXor r3, r9, (6*_r0+1)%32
str r3, [r1], #4
mRliXor r4, r6, (6*_r0+1)%32
str r4, [r1], #4
mRliXor r5, r7, (6*_r0+1)%32
str r5, [r1], #4
ldm r0!, {r2-r5}
mRliXor r2, r10, (6*_r0+_e1+_w1)%32
str r2, [r1], #4
mRliXor r3, r11, (6*_r0+_e1+_w1)%32
str r3, [r1], #4
mRliXor r4, r12, (6*_r0+_e1+_w1)%32
str r4, [r1], #4
mRliXor r5, lr, (6*_r0+_e1+_w1)%32
str r5, [r1], #4
@ roll-e yAccu
ldr r0, [sp, #Xoofff_Expand_yAccu]
str r1, [sp, #Xoofff_Expand_output]
ldr lr, [r0], #4
ldmia r0!, {r10-r12}
ldmia r0!, {r2-r9}
and r1, r6, r2
eor lr, r1, lr, ROR #32-5
eor lr, lr, r2, ROR #32-13
eor lr, lr, #7
sub r0, #Xoofff_BlockSize
stmia r0, {r2-r12,lr}
@ loop management
ldr r0, [sp, #Xoofff_Expand_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_Expand_length]
bcs Xoofff_ExpandFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_Expand_output]
ldr r1, [sp, #Xoofff_Expand_iOutput]
sub r0, r0, r1
pop {r1,r2}
pop {r1-r12,pc}
.align 4
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_h_
#define _Xoodoo_h_
#include <stdint.h>
#include <stdlib.h>
#define MAXROUNDS 12
#define NROWS 3
#define NCOLUMS 4
/* Round constants */
#define _rc12 0x00000058
#define _rc11 0x00000038
#define _rc10 0x000003C0
#define _rc9 0x000000D0
#define _rc8 0x00000120
#define _rc7 0x00000014
#define _rc6 0x00000060
#define _rc5 0x0000002C
#define _rc4 0x00000380
#define _rc3 0x000000F0
#define _rc2 0x000001A0
#define _rc1 0x00000012
#if !defined(ROTL32)
#if defined (__arm__) && !defined(__GNUC__)
#define ROTL32(a, offset) __ror(a, (32-(offset))%32)
#elif defined(_MSC_VER)
#define ROTL32(a, offset) _rotl(a, (offset)%32)
#define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32)))
#if !defined(READ32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress)))
#elif defined(_MSC_VER)
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#if !defined(WRITE32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData))
#elif defined(_MSC_VER)
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#if !defined(index)
#define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS))
typedef uint32_t tXoodooLane;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_parameters_h_
#define _Xoodyak_parameters_h_
#define Xoodyak_f_bPrime 48
#define Xoodyak_Rhash 16
#define Xoodyak_Rkin 44
#define Xoodyak_Rkout 24
#define Xoodyak_lRatchet 16
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (Cortex-M3, ...).
.syntax unified
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _t3 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
.equ _rc12 , 0x00000058
.equ _rc11 , 0x00000038
.equ _rc10 , 0x000003C0
.equ _rc9 , 0x000000D0
.equ _rc8 , 0x00000120
.equ _rc7 , 0x00000014
.equ _rc6 , 0x00000060
.equ _rc5 , 0x0000002C
.equ _rc4 , 0x00000380
.equ _rc3 , 0x000000F0
.equ _rc2 , 0x000001A0
.equ _rc1 , 0x00000012
.equ _rc6x1 , 0x00000003
.equ _rc5x2 , 0x0b000000
.equ _rc4x3 , 0x07000000
.equ _rc3x4 , 0x000f0000
.equ _rc2x5 , 0x0000d000
.equ _rc1x6 , 0x00000048
.equ _rc12x1, 0xc0000002
.equ _rc11x2, 0x0e000000
.equ _rc10x3, 0x07800000
.equ _rc9x4 , 0x000d0000
.equ _rc8x5 , 0x00009000
.equ _rc7x6 , 0x00000050
.equ _rc6x7 , 0x0000000c
.equ _rc5x8 , 0x2c000000
.equ _rc4x9 , 0x1c000000
.equ _rc3x10, 0x003c0000
.equ _rc2x11, 0x00034000
.equ _rc1x12, 0x00000120
@ ----------------------------------------------------------------------------
.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2
.if ((\rho_e1)%32) == 0
eors \ro, \a0, \a1
eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32
.if ((\rho_e2)%32) == 0
eors \ro, \ro, \a2
eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32
.macro mRliXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ro, \ri, ROR #(32-(\rot))%32
.macro mRloXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ri, \ro, ROR #(32-(\rot))%32
.macro mChi3 a0,a1,a2,r0,r1
bic \r0, \a2, \a1, ROR #_w1
eors \a0, \a0, \r0, ROR #32-_w1
bic \r1, \a0, \a2, ROR #32-_w1
eors \a1, \a1, \r1
bic \r1, \a1, \a0
eors \a2, \a2, \r1, ROR #_w1
.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc
@ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations)
mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2
mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r2, r0, 32-_r0
mRloXor \r6i, r0, \rho_e1-_r0
mRloXor \r10i, r0, \rho_we2-_r0
mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2
mRliXor r1, r1, _r1-_r0
mRloXor r3, r1, 32-_r0
mRloXor \r7i, r1, \rho_e1-_r0
mRloXor \r11i, r1, \rho_we2-_r0
mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r4, r0, 32-_r0
mRloXor \r8i, r0, \rho_e1-_r0
mRloXor \r12i, r0, \rho_we2-_r0
mRliXor r1, r1, _r1-_r0
mRloXor r5, r1, 32-_r0
mRloXor \r9i, r1, \rho_e1-_r0
mRloXor \lri, r1, \rho_we2-_r0
@ After Theta the whole state is rotated -r0
@ from here we must use a1.w instead of a1.i
@ Iota: round constant
.if \rc == 0xc0000002
eor r2, r2, #0x00000002
eor r2, r2, #0xc0000000
eor r2, r2, #\rc
@ Chi: non linear step, on colums
mChi3 r2, \r6w, \r10i, r0, r1
mChi3 r3, \r7w, \r11i, r0, r1
mChi3 r4, \r8w, \r12i, r0, r1
mChi3 r5, \r9w, \lri, r0, r1
.equ offsetInstance , 0
.equ offsetInitialLen , 16
.equ offsetReturn , 20
@ ----------------------------------------------------------------------------
@ Xoodoo_Permute_12roundsAsm: only callable from asm
.type Xoodoo_Permute_12roundsAsm, %function;
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12
ror r2, r2, #32-(12*_r0)%32
ror r3, r3, #32-(12*_r0)%32
ror r4, r4, #32-(12*_r0)%32
ror r5, r5, #32-(12*_r0)%32
ror r6, r6, #32-(12*_r0+1)%32
ror r7, r7, #32-(12*_r0+1)%32
ror r8, r8, #32-(12*_r0+1)%32
ror r9, r9, #32-(12*_r0+1)%32
ror r10, r10, #32-(12*_r0+_e1+_w1)%32
ror r11, r11, #32-(12*_r0+_e1+_w1)%32
ror r12, r12, #32-(12*_r0+_e1+_w1)%32
ror lr, lr, #32-(12*_r0+_e1+_w1)%32
ldr pc, [sp, #offsetReturn]
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@
@ X += Xoodyak_Rkin@
@ XLen -= Xoodyak_Rkin@
@ } while (XLen >= Xoodyak_Rkin)@
@ return initialLength - XLen@
@ }
.equ offsetAbsorbX , 4
.equ offsetAbsorbXLen , 8
.global Xoodyak_AbsorbKeyedFullBlocks
.type Xoodyak_AbsorbKeyedFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #44
ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
b.w Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetAbsorbX]
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r5, r5, r1
ldr r1, [r0], #4
eors r6, r6, r1
ldr r1, [r0], #4
eors r7, r7, r1
ldr r1, [r0], #4
eors r8, r8, r1
ldr r1, [r0], #4
eors r9, r9, r1
ldr r1, [r0], #4
eors r10, r10, r1
ldr r1, [r0], #4
eors r11, r11, r1
ldr r1, [r0], #4
eors lr, lr, #1
eors r12, r12, r1
ldr r1, [sp, #offsetAbsorbXLen]
str r0, [sp, #offsetAbsorbX]
subs r1, r1, #44
str r1, [sp, #offsetAbsorbXLen]
bcs Xoodyak_AbsorbKeyedFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #44
sub r0, r4, r2
pop {r4-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@
@ X += Xoodyak_Rhash@
@ XLen -= Xoodyak_Rhash@
@ } while (XLen >= Xoodyak_Rhash)@
@ return initialLength - XLen@
@ }
.global Xoodyak_AbsorbHashFullBlocks
.type Xoodyak_AbsorbHashFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #16
ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
b.w Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetAbsorbX]
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r6, r6, #1
eors r5, r5, r1
ldr r1, [sp, #offsetAbsorbXLen]
str r0, [sp, #offsetAbsorbX]
subs r1, r1, #16
str r1, [sp, #offsetAbsorbXLen]
bcs Xoodyak_AbsorbHashFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #16
sub r0, r4, r2
pop {r4-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@
@ Y += Xoodyak_Rkout@
@ YLen -= Xoodyak_Rkout@
@ } while (YLen >= Xoodyak_Rkout)@
@ return initialLength - YLen@
@ }
.equ offsetSqueezeY , 4
.equ offsetSqueezeYLen , 8
.global Xoodyak_SqueezeKeyedFullBlocks
.type Xoodyak_SqueezeKeyedFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #24
ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
eors r2, r2, #1
b.w Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetSqueezeY]
str r2, [r0], #4
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
str r7, [r0], #4
ldr r1, [sp, #offsetSqueezeYLen]
str r0, [sp, #offsetSqueezeY]
subs r1, r1, #24
str r1, [sp, #offsetSqueezeYLen]
bcs Xoodyak_SqueezeKeyedFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #24
sub r0, r4, r2
pop {r4-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@
@ Y += Xoodyak_Rhash@
@ YLen -= Xoodyak_Rhash@
@ } while (YLen >= Xoodyak_Rhash)@
@ return initialLength - YLen@
@ }
.global Xoodyak_SqueezeHashFullBlocks
.type Xoodyak_SqueezeHashFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #16
ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
eors r2, r2, #1
b.w Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetSqueezeY]
str r2, [r0], #4
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
ldr r1, [sp, #offsetSqueezeYLen]
str r0, [sp, #offsetSqueezeY]
subs r1, r1, #16
str r1, [sp, #offsetSqueezeYLen]
bcs Xoodyak_SqueezeHashFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #16
sub r0, r4, r2
pop {r4-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.equ offsetCryptI , 4+8
.equ offsetCryptO , 8+8
.equ offsetCryptIOLen , 12
.global Xoodyak_EncryptFullBlocks
.type Xoodyak_EncryptFullBlocks, %function;
push {r4-r12,lr}
mov r4, r3 @ r4 initialLength
subs r3, r3, #24
ldr r5, =Xoodyak_EncryptFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
b.w Xoodoo_Permute_12roundsAsm
push {r10, r11}
ldr r11, [sp, #offsetCryptI]
ldr r10, [sp, #offsetCryptO]
ldr r0, [r11], #4
ldr r1, [r11], #4
eors r2, r2, r0
str r2, [r10], #4
eors r3, r3, r1
ldr r0, [r11], #4
str r3, [r10], #4
eors r4, r4, r0
ldr r1, [r11], #4
str r4, [r10], #4
eors r5, r5, r1
ldr r0, [r11], #4
str r5, [r10], #4
eors r6, r6, r0
ldr r1, [r11], #4
str r6, [r10], #4
eors r7, r7, r1
str r7, [r10], #4
str r10, [sp, #offsetCryptO]
str r11, [sp, #offsetCryptI]
pop {r10, r11}
ldr r0, [sp, #offsetCryptIOLen]
eors r8, r8, #1
subs r0, r0, #24
str r0, [sp, #offsetCryptIOLen]
bcs Xoodyak_EncryptFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r3, r3, #24
sub r0, r4, r3
pop {r4-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.global Xoodyak_DecryptFullBlocks
.type Xoodyak_DecryptFullBlocks, %function;
push {r4-r12,lr}
mov r4, r3 @ r4 initialLength
subs r3, r3, #24
ldr r5, =Xoodyak_DecryptFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
b.w Xoodoo_Permute_12roundsAsm
push {r10, r11}
ldr r11, [sp, #offsetCryptI]
ldr r10, [sp, #offsetCryptO]
ldr r0, [r11], #4
ldr r1, [r11], #4
eors r2, r2, r0
str r2, [r10], #4
mov r2, r0
eors r3, r3, r1
ldr r0, [r11], #4
str r3, [r10], #4
mov r3, r1
eors r4, r4, r0
ldr r1, [r11], #4
str r4, [r10], #4
mov r4, r0
eors r5, r5, r1
ldr r0, [r11], #4
str r5, [r10], #4
mov r5, r1
eors r6, r6, r0
ldr r1, [r11], #4
str r6, [r10], #4
mov r6, r0
eors r7, r7, r1
str r7, [r10], #4
mov r7, r1
str r10, [sp, #offsetCryptO]
str r11, [sp, #offsetCryptI]
pop {r10, r11}
ldr r0, [sp, #offsetCryptIOLen]
eors r8, r8, #1
subs r0, r0, #24
str r0, [sp, #offsetCryptIOLen]
bcs Xoodyak_DecryptFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r3, r3, #24
sub r0, r4, r3
pop {r4-r12,pc}
.align 4
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifdef XoodooReference
#include "displayIntermediateValues.h"
#include <assert.h>
#include <string.h>
#include "Xoodyak.h"
#ifdef OUTPUT
#include <stdlib.h>
#include <string.h>
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length);
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length)
unsigned int i;
fprintf(f, "%s:", synopsis);
for(i=0; i<length; i++)
fprintf(f, " %02x", (unsigned int)data[i]);
fprintf(f, "\n");
#define MyMin(a,b) (((a) < (b)) ? (a) : (b))
#ifdef XKCP_has_Xoodoo
#include "Xoodoo-SnP.h"
#define SnP Xoodoo
#define SnP_Permute Xoodoo_Permute_12rounds
#define prefix Xoodyak
#include ""
#undef prefix
#undef SnP
#undef SnP_Permute
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_h_
#define _Xoodyak_h_
#include "config.h"
#ifdef XKCP_has_Xoodoo
#include <stddef.h>
#include "Cyclist.h"
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment)
#error This requires an implementation of Xoodoo
The eXtended Keccak Code Package (XKCP)
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _align_h_
#define _align_h_
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
#ifdef ALIGN
#undef ALIGN
#if defined(__GNUC__)
#define ALIGN(x) __attribute__ ((aligned(x)))
#elif defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#elif defined(__ARMCC_VERSION)
#define ALIGN(x) __align(x)
#define ALIGN(x)
#define CRYPTO_ABYTES 16
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
#elif defined( LITTLE_ENDIAN )
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
#elif defined( _LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
#elif defined( __LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# endif
#elif defined( __BIG_ENDIAN__ )
#elif defined( __LITTLE_ENDIAN__ )
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \
defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ )
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# else
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
/* File generated by ToTargetConfigFile.xsl */
#define XKCP_has_Xoodyak
#define XKCP_has_Xoodoo
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include "crypto_aead.h"
#include "api.h"
#include "Xoodyak.h"
#include <string.h>
#if !defined(CRYPTO_KEYBYTES)
#if !defined(CRYPTO_NPUBBYTES)
#define TAGLEN 16
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
Xoodyak_Instance instance;
Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES);
Xoodyak_Absorb(&instance, ad, (size_t)adlen);
Xoodyak_Encrypt(&instance, m, c, (size_t)mlen);
Xoodyak_Squeeze(&instance, c + mlen, TAGLEN);
*clen = mlen + TAGLEN;
#if 0
unsigned int i;
for (i = 0; i < *clen; ++i )
printf("\\x%02x", c[i] );
return 0;
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
Xoodyak_Instance instance;
unsigned char tag[TAGLEN];
unsigned long long mlen_;
*mlen = 0;
if (clen < TAGLEN) {
return -1;
mlen_ = clen - TAGLEN;
Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES);
Xoodyak_Absorb(&instance, ad, (size_t)adlen);
Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_);
Xoodyak_Squeeze(&instance, tag, TAGLEN);
if (memcmp(tag, c + mlen_, TAGLEN) != 0) {
memset(m, 0, (size_t)mlen_);
return -1;
*mlen = mlen_;
return 0;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Cyclist_h_
#define _Cyclist_h_
#include <stdint.h>
#include "align.h"
#define Cyclist_ModeHash 1
#define Cyclist_ModeKeyed 2
#define Cyclist_PhaseDown 1
#define Cyclist_PhaseUp 2
#ifdef OUTPUT
#include <stdio.h>
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
uint8_t stateShadow[size]; \
FILE *file; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistFunctions(prefix) \
void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \
void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \
void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \
void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \
void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \
void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \
void prefix##_Ratchet(prefix##_Instance *instance);
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
#define SnP_Initialize JOIN(SnP, _Initialize)
#define SnP_AddBytes JOIN(SnP, _AddBytes)
#define SnP_AddByte JOIN(SnP, _AddByte)
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
#define Cyclist_Instance JOIN(prefix, _Instance)
#define Cyclist_Initialize JOIN(prefix, _Initialize)
#define Cyclist_Absorb JOIN(prefix, _Absorb)
#define Cyclist_Encrypt JOIN(prefix, _Encrypt)
#define Cyclist_Decrypt JOIN(prefix, _Decrypt)
#define Cyclist_Squeeze JOIN(prefix, _Squeeze)
#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey)
#define Cyclist_Ratchet JOIN(prefix, _Ratchet)
#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny)
#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey)
#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny)
#define Cyclist_Down JOIN(prefix, _Down)
#define Cyclist_Up JOIN(prefix, _Up)
#define Cyclist_Crypt JOIN(prefix, _Crypt)
#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime)
#define Cyclist_Rhash JOIN(prefix, _Rhash)
#define Cyclist_Rkin JOIN(prefix, _Rkin)
#define Cyclist_Rkout JOIN(prefix, _Rkout)
#define Cyclist_lRatchet JOIN(prefix, _lRatchet)
#if defined(CyclistFullBlocks_supported)
#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks)
#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks)
#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks)
#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks)
#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks)
#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks)
/* ------- Cyclist internal interfaces ------- */
static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd)
SnP_AddBytes(instance->state, Xi, 0, XiLen);
SnP_AddByte(instance->state, 0x01, XiLen);
SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1);
instance->phase = Cyclist_PhaseDown;
static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu)
#if defined(OUTPUT)
uint8_t s[Cyclist_f_bPrime];
if (instance->mode != Cyclist_ModeHash) {
SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1);
#if defined(OUTPUT)
if (instance->file != NULL) {
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime );
SnP_Permute( instance->state );
#if defined(OUTPUT)
if (instance->file != NULL) {
memcpy( instance->stateShadow, instance->state, sizeof(instance->state) );
fprintf( instance->file, "Data XORed" );
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
fprintf( instance->file, "After f() ");
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
instance->phase = Cyclist_PhaseUp;
SnP_ExtractBytes( instance->state, Yi, 0, YiLen );
static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd)
unsigned int splitLen;
do {
if (instance->phase != Cyclist_PhaseUp) {
Cyclist_Up(instance, NULL, 0, 0);
splitLen = MyMin(XLen, r);
Cyclist_Down(instance, X, splitLen, Cd);
Cd = 0;
X += splitLen;
XLen -= splitLen;
#if defined(CyclistFullBlocks_supported)
if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
} while ( XLen != 0 );
static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
uint8_t KID[Cyclist_Rkin];
assert(instance->mode == Cyclist_ModeHash);
assert((KLen + IDLen) <= (Cyclist_Rkin - 1));
instance->mode = Cyclist_ModeKeyed;
instance->Rabsorb = Cyclist_Rkin;
instance->Rsqueeze = Cyclist_Rkout;
if (KLen != 0) {
memcpy(KID, K, KLen);
memcpy(KID + KLen, ID, IDLen);
KID[KLen + IDLen] = (uint8_t)IDLen;
Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02);
if (counterLen != 0) {
Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00);
static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu)
unsigned int len;
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, Cu);
Y += len;
YLen -= len;
while (YLen != 0) {
#if defined(CyclistFullBlocks_supported)
if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
Cyclist_Down(instance, NULL, 0, 0);
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, 0);
Y += len;
YLen -= len;
static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt)
unsigned int splitLen;
uint8_t P[Cyclist_Rkout];
uint8_t Cu = 0x80;
do {
if (decrypt != 0) {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, O, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
else {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
memcpy(P, I, splitLen);
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, P, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
Cu = 0x00;
} while ( IOLen != 0 );
/* ------- Cyclist interfaces ------- */
void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
instance->phase = Cyclist_PhaseUp;
instance->mode = Cyclist_ModeHash;
instance->Rabsorb = Cyclist_Rhash;
instance->Rsqueeze = Cyclist_Rhash;
#ifdef OUTPUT
instance->file = 0;
SnP_Initialize( instance->stateShadow );
if (KLen != 0) {
Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen);
void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen)
Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03);
void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, P, C, PLen, 0);
void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, C, P, CLen, 1);
void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen)
Cyclist_SqueezeAny(instance, Y, YLen, 0x40);
void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_SqueezeAny(instance, K, KLen, 0x20);
void Cyclist_Ratchet(Cyclist_Instance *instance)
uint8_t buffer[Cyclist_lRatchet];
assert(instance->mode == Cyclist_ModeKeyed);
/* Squeeze then absorb is the same as overwriting with zeros */
Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10);
Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00);
#undef SnP_StaticInitialize
#undef SnP_Initialize
#undef SnP_AddBytes
#undef SnP_AddByte
#undef SnP_OverwriteBytes
#undef SnP_ExtractBytes
#undef SnP_ExtractAndAddBytes
#undef Cyclist_Instance
#undef Cyclist_Initialize
#undef Cyclist_Absorb
#undef Cyclist_Encrypt
#undef Cyclist_Decrypt
#undef Cyclist_Squeeze
#undef Cyclist_SqueezeKey
#undef Cyclist_Ratchet
#undef Cyclist_AbsorbAny
#undef Cyclist_AbsorbKey
#undef Cyclist_SqueezeAny
#undef Cyclist_Down
#undef Cyclist_Up
#undef Cyclist_Crypt
#undef Cyclist_f_bPrime
#undef Cyclist_Rhash
#undef Cyclist_Rkin
#undef Cyclist_Rkout
#undef Cyclist_lRatchet
#if defined(CyclistFullBlocks_supported)
#undef Cyclist_AbsorbKeyedFullBlocks
#undef Cyclist_AbsorbHashFullBlocks
#undef Cyclist_SqueezeKeyedFullBlocks
#undef Cyclist_SqueezeHashFullBlocks
#undef Cyclist_EncryptFullBlocks
#undef Cyclist_DecryptFullBlocks
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_SnP_h_
#define _Xoodoo_SnP_h_
#include <stddef.h>
#include <stdint.h>
/** For the documentation, see SnP-documentation.h.
#define Xoodoo_implementation "AVR8 optimized implementation"
#define Xoodoo_stateSizeInBytes (3*4*4)
#define Xoodoo_stateAlignment 1
#define Xoodoo_HasNround
#define Xoodoo_StaticInitialize()
void Xoodoo_Initialize(void *state);
#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData)
void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount);
void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds);
void Xoodoo_Permute_6rounds(void *state);
void Xoodoo_Permute_12rounds(void *state);
void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length);
; The eXtended Keccak Code Package (XKCP)
; The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
; Implementation by Ronny Van Keer, hereby denoted as "the implementer".
; For more information, feedback or questions, please refer to the Keccak Team website:
; To the extent possible under law, the implementer has waived all copyright
; and related or neighboring rights to the source code in this file.
; ---
; This file implements Xoodoo in a SnP-compatible way.
; Please refer to SnP-documentation.h for more details.
; This implementation comes with Xoodoo-SnP.h in the same folder.
; Please refer to for the exact list of other files it must be combined with.
; INFO: Tested on ATmega1280 simulator
; Registers used in all routines
#define zero 1
#define rpState 24
#define rX 26
#define rY 28
#define rZ 30
#define sp 0x3D
; void Xoodoo_StaticInitialize( void )
.global Xoodoo_StaticInitialize
; void Xoodoo_Initialize(void *state)
; argument state is passed in r24:r25
.global Xoodoo_Initialize
movw rZ, r24
ldi r23, 3*4/2 ; clear state (8 bytes / 2 lanes) per iteration
st z+, zero
st z+, zero
st z+, zero
st z+, zero
st z+, zero
st z+, zero
st z+, zero
st z+, zero
dec r23
brne Xoodoo_Initialize_Loop
; void Xoodoo_AddByte(void *state, unsigned char data, unsigned int offset)
; argument state is passed in r24:r25
; argument data is passed in r22:r23, only LSB (r22) is used
; argument offset is passed in r20:r21, only LSB (r20) is used
.global Xoodoo_AddByte
movw rZ, r24
add rZ, r20
adc rZ+1, zero
ld r0, Z
eor r0, r22
st Z, r0
; void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
; argument state is passed in r24:r25
; argument data is passed in r22:r23
; argument offset is passed in r20:r21, only LSB (r20) is used
; argument length is passed in r18:r19, only LSB (r18) is used
.global Xoodoo_AddBytes
movw rZ, r24
add rZ, r20
adc rZ+1, zero
movw rX, r22
subi r18, 8
brcs Xoodoo_AddBytes_Byte
;do 8 bytes per iteration
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
subi r18, 8
brcc Xoodoo_AddBytes_Loop8
ldi r19, 8
add r18, r19
breq Xoodoo_AddBytes_End
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
dec r18
brne Xoodoo_AddBytes_Loop1
; void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
; argument state is passed in r24:r25
; argument data is passed in r22:r23
; argument offset is passed in r20:r21, only LSB (r20) is used
; argument length is passed in r18:r19, only LSB (r18) is used
.global Xoodoo_OverwriteBytes
movw rZ, r24
add rZ, r20
adc rZ+1, zero
movw rX, r22
subi r18, 8
brcs Xoodoo_OverwriteBytes_Byte
;do 8 bytes per iteration
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
subi r18, 8
brcc Xoodoo_OverwriteBytes_Loop8
ldi r19, 8
add r18, r19
breq Xoodoo_OverwriteBytes_End
ld r0, X+
st Z+, r0
dec r18
brne Xoodoo_OverwriteBytes_Loop1
; void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
; argument state is passed in r24:r25
; argument byteCount is passed in r22:r23, only LSB (r22) is used
.global Xoodoo_OverwriteWithZeroes
movw rZ, r24 ; rZ = state
mov r23, r22
lsr r23
lsr r23
lsr r23
breq Xoodoo_OverwriteWithZeroes_Bytes
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
dec r23
brne Xoodoo_OverwriteWithZeroes_LoopLanes
andi r22, 7
breq Xoodoo_OverwriteWithZeroes_End
st Z+, r1
dec r22
brne Xoodoo_OverwriteWithZeroes_LoopBytes
; void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
; argument state is passed in r24:r25
; argument data is passed in r22:r23
; argument offset is passed in r20:r21, only LSB (r20) is used
; argument length is passed in r18:r19, only LSB (r18) is used
.global Xoodoo_ExtractBytes
movw rZ, r24
add rZ, r20
adc rZ+1, zero
movw rX, r22
subi r18, 8
brcs Xoodoo_ExtractBytes_Byte
;do 8 bytes per iteration
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
subi r18, 8
brcc Xoodoo_ExtractBytes_Loop8
ldi r19, 8
add r18, r19
breq Xoodoo_ExtractBytes_End
ld r0, Z+
st X+, r0
dec r18
brne Xoodoo_ExtractBytes_Loop1
; void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
; argument state is passed in r24:r25
; argument input is passed in r22:r23
; argument output is passed in r20:r21
; argument offset is passed in r18:r19, only LSB (r18) is used
; argument length is passed in r16:r17, only LSB (r16) is used
.global Xoodoo_ExtractAndAddBytes
tst r16
breq Xoodoo_ExtractAndAddBytes_End
push r16
push r28
push r29
movw rZ, r24
add rZ, r18
adc rZ+1, zero
movw rX, r22
movw rY, r20
subi r16, 8
brcs Xoodoo_ExtractAndAddBytes_Byte
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
subi r16, 8
brcc Xoodoo_ExtractAndAddBytes_LoopLane
ldi r19, 8
add r16, r19
breq Xoodoo_ExtractAndAddBytes_Done
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
dec r16
brne Xoodoo_ExtractAndAddBytes_Loop1
pop r29
pop r28
pop r16
.BYTE 0x58, 0x00
.BYTE 0x38, 0x00
.BYTE 0xC0, 0x03
.BYTE 0xD0, 0x00
.BYTE 0x20, 0x01
.BYTE 0x14, 0x00
.BYTE 0x60, 0x00
.BYTE 0x2C, 0x00
.BYTE 0x80, 0x03
.BYTE 0xF0, 0x00
.BYTE 0xA0, 0x01
.BYTE 0x12, 0x00
.BYTE 0xFF, 0 ; terminator
; Register variables used in permutation
#define rC0 2 // 4 regs (2-5)
#define rC1 6 // 4 regs (6-9)
#define rC2 10 // 4 regs (10-13)
#define rC3 14 // 4 regs (14-17)
#define rVv 18 // 4 regs (18-21)
#define rTt 22 // 4 regs (22-25)
// r26-27 free
#define a00 0
#define a01 4
#define a02 8
#define a03 12
#define a10 16
#define a11 20
#define a12 24
#define a13 28
#define a20 32
#define a21 36
#define a22 40
#define a23 44
; void Xoodoo_Permute_Nrounds( void *state, unsigned int nrounds )
; argument state is passed in r24:r25
; argument nrounds is passed in r22:r23 (only LSB (r22) is used)
.global Xoodoo_Permute_Nrounds
mov r26, r22
ldi rZ+0, lo8(Xoodoo_RoundConstants_0)
ldi rZ+1, hi8(Xoodoo_RoundConstants_0)
lsl r26
sub rZ, r26
sbc rZ+1, zero
rjmp Xoodoo_Permute
; void Xoodoo_Permute_6rounds( void *state )
; argument state is passed in r24:r25
.global Xoodoo_Permute_6rounds
ldi rZ+0, lo8(Xoodoo_RoundConstants_6)
ldi rZ+1, hi8(Xoodoo_RoundConstants_6)
rjmp Xoodoo_Permute
; void Xoodoo_Permute_12rounds( void *state )
; argument state is passed in r24:r25
.global Xoodoo_Permute_12rounds
ldi rZ+0, lo8(Xoodoo_RoundConstants_12)
ldi rZ+1, hi8(Xoodoo_RoundConstants_12)
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
; Initial Prepare Theta
movw rY, rpState
ld rC0+0, Y+ ; a00
ld rC0+1, Y+
ld rC0+2, Y+
ld rC0+3, Y+
ld rC1+0, Y+ ; a01
ld rC1+1, Y+
ld rC1+2, Y+
ld rC1+3, Y+
ld rC2+0, Y+ ; a02
ld rC2+1, Y+
ld rC2+2, Y+
ld rC2+3, Y+
ld rC3+0, Y+ ; a03
ld rC3+1, Y+
ld rC3+2, Y+
ld rC3+3, Y+
ld r0, Y+ ; a10
eor rC0+0, r0
ld r0, Y+
eor rC0+1, r0
ld r0, Y+
eor rC0+2, r0
ld r0, Y+
eor rC0+3, r0
ld r0, Y+ ; a11
eor rC1+0, r0
ld r0, Y+
eor rC1+1, r0
ld r0, Y+
eor rC1+2, r0
ld r0, Y+
eor rC1+3, r0
ld r0, Y+ ; a12
eor rC2+0, r0
ld r0, Y+
eor rC2+1, r0
ld r0, Y+
eor rC2+2, r0
ld r0, Y+
eor rC2+3, r0
ld r0, Y+ ; a13
eor rC3+0, r0
ld r0, Y+
eor rC3+1, r0
ld r0, Y+
eor rC3+2, r0
ld r0, Y+
eor rC3+3, r0
ld r0, Y+ ; a20
eor rC0+0, r0
ld r0, Y+
eor rC0+1, r0
ld r0, Y+
eor rC0+2, r0
ld r0, Y+
eor rC0+3, r0
ld r0, Y+ ; a21
eor rC1+0, r0
ld r0, Y+
eor rC1+1, r0
ld r0, Y+
eor rC1+2, r0
ld r0, Y+
eor rC1+3, r0
ld r0, Y+ ; a22
eor rC2+0, r0
ld r0, Y+
eor rC2+1, r0
ld r0, Y+
eor rC2+2, r0
ld r0, Y+
eor rC2+3, r0
ld r0, Y+ ; a23
eor rC3+0, r0
ld r0, Y+
eor rC3+1, r0
ld r0, Y+
eor rC3+2, r0
ld r0, Y+
eor rC3+3, r0
sbiw rY, 48
; Theta + Rho west
; c0 = ROTL32(c0 ^ ROTL32(c0, 9), 5);
mov rVv+1, rC0+0 ; rol 9
mov rVv+2, rC0+1
mov rVv+3, rC0+2
mov rVv+0, rC0+3
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
eor rVv+0, rC0+0
eor rVv+1, rC0+1
eor rVv+2, rC0+2
eor rVv+3, rC0+3
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
mov rC0+0, rVv+3
mov rC0+1, rVv+0
mov rC0+2, rVv+1
mov rC0+3, rVv+2
; c1 = ROTL32(c1 ^ ROTL32(c1, 9), 5);
mov rVv+1, rC1+0 ; rol 9
mov rVv+2, rC1+1
mov rVv+3, rC1+2
mov rVv+0, rC1+3
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
eor rVv+0, rC1+0
eor rVv+1, rC1+1
eor rVv+2, rC1+2
eor rVv+3, rC1+3
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
mov rC1+0, rVv+3
mov rC1+1, rVv+0
mov rC1+2, rVv+1
mov rC1+3, rVv+2
; c2 = ROTL32(c2 ^ ROTL32(c2, 9), 5);
mov rVv+1, rC2+0 ; rol 9
mov rVv+2, rC2+1
mov rVv+3, rC2+2
mov rVv+0, rC2+3
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
eor rVv+0, rC2+0
eor rVv+1, rC2+1
eor rVv+2, rC2+2
eor rVv+3, rC2+3
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
mov rC2+0, rVv+3
mov rC2+1, rVv+0
mov rC2+2, rVv+1
mov rC2+3, rVv+2
; c3 = ROTL32(c3 ^ ROTL32(c3, 9), 5);
mov rVv+1, rC3+0 ; rol 9
mov rVv+2, rC3+1
mov rVv+3, rC3+2
mov rVv+0, rC3+3
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
eor rVv+0, rC3+0
eor rVv+1, rC3+1
eor rVv+2, rC3+2
eor rVv+3, rC3+3
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
mov rC3+0, rVv+3
mov rC3+1, rVv+0
mov rC3+2, rVv+1
mov rC3+3, rVv+2
; v1 = a13;
ldd rVv+0, Y+a13+0
ldd rVv+1, Y+a13+1
ldd rVv+2, Y+a13+2
ldd rVv+3, Y+a13+3
; a13 = a12 ^ c1;
ldd r0, Y+a12+0
eor r0, rC1+0
std Y+a13+0, r0
ldd r0, Y+a12+1
eor r0, rC1+1
std Y+a13+1, r0
ldd r0, Y+a12+2
eor r0, rC1+2
std Y+a13+2, r0
ldd r0, Y+a12+3
eor r0, rC1+3
std Y+a13+3, r0
; a12 = a11 ^ c0;
ldd r0, Y+a11+0
eor r0, rC0+0
std Y+a12+0, r0
ldd r0, Y+a11+1
eor r0, rC0+1
std Y+a12+1, r0
ldd r0, Y+a11+2
eor r0, rC0+2
std Y+a12+2, r0
ldd r0, Y+a11+3
eor r0, rC0+3
std Y+a12+3, r0
; a11 = a10 ^ c3;
ldd r0, Y+a10+0
eor r0, rC3+0
std Y+a11+0, r0
ldd r0, Y+a10+1
eor r0, rC3+1
std Y+a11+1, r0
ldd r0, Y+a10+2
eor r0, rC3+2
std Y+a11+2, r0
ldd r0, Y+a10+3
eor r0, rC3+3
std Y+a11+3, r0
; a10 = v1 ^ c2;
eor rVv+0, rC2+0
std Y+a10+0, rVv+0
eor rVv+1, rC2+1
std Y+a10+1, rVv+1
eor rVv+2, rC2+2
std Y+a10+2, rVv+2
eor rVv+3, rC2+3
std Y+a10+3, rVv+3
; a20 = ROTL32(a20 ^ c3, 11);
ldd rVv+0, Y+a20+3
eor rVv+0, rC3+3
ldd rVv+1, Y+a20+0
eor rVv+1, rC3+0
ldd rVv+2, Y+a20+1
eor rVv+2, rC3+1
ldd rVv+3, Y+a20+2
eor rVv+3, rC3+2
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
std Y+a20+0, rVv+0
std Y+a20+1, rVv+1
std Y+a20+2, rVv+2
std Y+a20+3, rVv+3
; a21 = ROTL32(a21 ^ c0, 11);
ldd rVv+0, Y+a21+3
eor rVv+0, rC0+3
ldd rVv+1, Y+a21+0
eor rVv+1, rC0+0
ldd rVv+2, Y+a21+1
eor rVv+2, rC0+1
ldd rVv+3, Y+a21+2
eor rVv+3, rC0+2
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
std Y+a21+0, rVv+0
std Y+a21+1, rVv+1
std Y+a21+2, rVv+2
std Y+a21+3, rVv+3
; a22 = ROTL32(a22 ^ c1, 11);
ldd rVv+0, Y+a22+3
eor rVv+0, rC1+3
ldd rVv+1, Y+a22+0
eor rVv+1, rC1+0
ldd rVv+2, Y+a22+1
eor rVv+2, rC1+1
ldd rVv+3, Y+a22+2
eor rVv+3, rC1+2
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
std Y+a22+0, rVv+0
std Y+a22+1, rVv+1
std Y+a22+2, rVv+2
std Y+a22+3, rVv+3
; a23 = ROTL32(a23 ^ c2, 11);
ldd rVv+0, Y+a23+3
eor rVv+0, rC2+3
ldd rVv+1, Y+a23+0
eor rVv+1, rC2+0
ldd rVv+2, Y+a23+1
eor rVv+2, rC2+1
ldd rVv+3, Y+a23+2
eor rVv+3, rC2+2
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
std Y+a23+0, rVv+0
std Y+a23+1, rVv+1
std Y+a23+2, rVv+2
std Y+a23+3, rVv+3
; v1 = c3;
movw rVv+0, rC3+0
movw rVv+2, rC3+2
; c3 = a03 ^ c2; /* a03 resides in c3 */
ldd rC3+0, Y+a03+0
eor rC3+0, rC2+0
ldd rC3+1, Y+a03+1
eor rC3+1, rC2+1
ldd rC3+2, Y+a03+2
eor rC3+2, rC2+2
ldd rC3+3, Y+a03+3
eor rC3+3, rC2+3
; c2 = a02 ^ c1; /* a02 resides in c2 */
ldd rC2+0, Y+a02+0
eor rC2+0, rC1+0
ldd rC2+1, Y+a02+1
eor rC2+1, rC1+1
ldd rC2+2, Y+a02+2
eor rC2+2, rC1+2
ldd rC2+3, Y+a02+3
eor rC2+3, rC1+3
; c1 = a01 ^ c0; /* a01 resides in c1 */
ldd rC1+0, Y+a01+0
eor rC1+0, rC0+0
ldd rC1+1, Y+a01+1
eor rC1+1, rC0+1
ldd rC1+2, Y+a01+2
eor rC1+2, rC0+2
ldd rC1+3, Y+a01+3
eor rC1+3, rC0+3
; c0 = a00 ^ v1; /* a00 resides in c0 */
ldd rC0+0, Y+a00+0
eor rC0+0, rVv+0
ldd rC0+1, Y+a00+1
eor rC0+1, rVv+1
ldd rC0+2, Y+a00+2
eor rC0+2, rVv+2
ldd rC0+3, Y+a00+3
eor rC0+3, rVv+3
; c0 ^= __rc; /* +Iota */
lpm rVv+0, Z+
lpm rVv+1, Z+
eor rC0+0, rVv+0
eor rC0+1, rVv+1
; Chi + Rho east + Early Theta
; a00 = c0 ^= ~a10 & a20;
ldd r0, Y+a10+0
com r0
ldd rTt+0, Y+a20+0 ; a20 in rTt
and r0, rTt+0
eor rC0+0, r0
std Y+a00+0, rC0+0
ldd r0, Y+a10+1
com r0
ldd rTt+1, Y+a20+1
and r0, rTt+1
eor rC0+1, r0
std Y+a00+1, rC0+1
ldd r0, Y+a10+2
com r0
ldd rTt+2, Y+a20+2
and r0, rTt+2
eor rC0+2, r0
std Y+a00+2, rC0+2
ldd r0, Y+a10+3
com r0
ldd rTt+3, Y+a20+3
and r0, rTt+3
eor rC0+3, r0
std Y+a00+3, rC0+3
; a10 ^= ~a20 & c0;
com rTt+0
and rTt+0, rC0+0
ldd r0, Y+a10+0
eor rTt+0, r0 ; new a10 in rTt
std Y+a10+0, rTt+0
com rTt+1
and rTt+1, rC0+1
ldd r0, Y+a10+1
eor rTt+1, r0
std Y+a10+1, rTt+1
com rTt+2
and rTt+2, rC0+2
ldd r0, Y+a10+2
eor rTt+2, r0
std Y+a10+2, rTt+2
com rTt+3
and rTt+3, rC0+3
ldd r0, Y+a10+3
eor rTt+3, r0
std Y+a10+3, rTt+3
; v1(a20) = ROTL32(a20 ^ ~c0 & a10, 8);
movw rVv+0, rTt+0 ; a10 in rVv
movw rVv+2, rTt+2
mov r0, rC0+0
com r0
and rTt+0, r0
ldd r0, Y+a20+0
eor rTt+0, r0
mov r0, rC0+1
com r0
and rTt+1, r0
ldd r0, Y+a20+1
eor rTt+1, r0
mov r0, rC0+2
com r0
and rTt+2, r0
ldd r0, Y+a20+2
eor rTt+2, r0
mov r0, rC0+3
com r0
and rTt+3, r0
ldd r0, Y+a20+3
eor rTt+3, r0
std Y+a20+0, rTt+3
std Y+a20+1, rTt+0
std Y+a20+2, rTt+1
std Y+a20+3, rTt+2
; c0 ^= a10 = ROTL32(a10, 1);
lsl rVv+0
rol rVv+1
std Y+a10+1, rVv+1
eor rC0+1, rVv+1
rol rVv+2
std Y+a10+2, rVv+2
eor rC0+2, rVv+2
rol rVv+3
std Y+a10+3, rVv+3
eor rC0+3, rVv+3
adc rVv+0, zero
std Y+a10+0, rVv+0
eor rC0+0, rVv+0
; a02 = c2 ^= ~a12 & a22;
ldd r0, Y+a12+0
com r0
ldd rVv+0, Y+a22+0 ; a22 in rVv
and r0, rVv+0
eor rC2+0, r0
std Y+a02+0, rC2+0
ldd r0, Y+a12+1
com r0
ldd rVv+1, Y+a22+1
and r0, rVv+1
eor rC2+1, r0
std Y+a02+1, rC2+1
ldd r0, Y+a12+2
com r0
ldd rVv+2, Y+a22+2
and r0, rVv+2
eor rC2+2, r0
std Y+a02+2, rC2+2
ldd r0, Y+a12+3
com r0
ldd rVv+3, Y+a22+3
and r0, rVv+3
eor rC2+3, r0
std Y+a02+3, rC2+3
; a12 ^= ~a22 & c2;
mov r0, rVv+0 ; a12 in rTt
com r0
and r0, rC2+0
ldd rTt+0, Y+a12+0
eor rTt+0, r0
std Y+a12+0, rTt+0
mov r0, rVv+1
com r0
and r0, rC2+1
ldd rTt+1, Y+a12+1
eor rTt+1, r0
std Y+a12+1, rTt+1
mov r0, rVv+2
com r0
and r0, rC2+2
ldd rTt+2, Y+a12+2
eor rTt+2, r0
std Y+a12+2, rTt+2
mov r0, rVv+3
com r0
and r0, rC2+3
ldd rTt+3, Y+a12+3
eor rTt+3, r0
std Y+a12+3, rTt+3
; c0 ^= a20 = ROTL32(a22 ^ ~c2 & a12, 8);
mov r0, rC2+0
com r0
and r0, rTt+0
eor r0, rVv+0
ldd rVv+0, Y+a20+1 ; rVv = a22
std Y+a20+1, r0
eor rC0+1, r0
mov r0, rC2+1
com r0
and r0, rTt+1
eor r0, rVv+1
ldd rVv+1, Y+a20+2
std Y+a20+2, r0
eor rC0+2, r0
mov r0, rC2+2
com r0
and r0, rTt+2
eor r0, rVv+2
ldd rVv+2, Y+a20+3
std Y+a20+3, r0
eor rC0+3, r0
mov r0, rC2+3
com r0
and r0, rTt+3
eor r0, rVv+3
ldd rVv+3, Y+a20+0
std Y+a20+0, r0
eor rC0+0, r0
; c2 ^= a12 = ROTL32(a12, 1);
lsl rTt+0
rol rTt+1
eor rC2+1, rTt+1
std Y+a12+1, rTt+1
rol rTt+2
eor rC2+2, rTt+2
std Y+a12+2, rTt+2
rol rTt+3
eor rC2+3, rTt+3
std Y+a12+3, rTt+3
adc rTt+0, zero
eor rC2+0, rTt+0
std Y+a12+0, rTt+0
; a22 = v1;
std Y+a22+0, rVv+3
std Y+a22+1, rVv+0
std Y+a22+2, rVv+1
std Y+a22+3, rVv+2
; c2 ^= v1;
eor rC2+0, rVv+3
eor rC2+1, rVv+0
eor rC2+2, rVv+1
eor rC2+3, rVv+2
; a01 = c1 ^= ~a11 & a21;
ldd rTt+0, Y+a11+0 ;rTt holds a11
mov r0, rTt+0
com r0
ldd rVv+0, Y+a21+0 ;rVv holds a21
and r0, rVv+0
eor rC1+0, r0
std Y+a01+0, rC1+0
ldd rTt+1, Y+a11+1
mov r0, rTt+1
com r0
ldd rVv+1, Y+a21+1
and r0, rVv+1
eor rC1+1, r0
std Y+a01+1, rC1+1
ldd rTt+2, Y+a11+2
mov r0, rTt+2
com r0
ldd rVv+2, Y+a21+2
and r0, rVv+2
eor rC1+2, r0
std Y+a01+2, rC1+2
ldd rTt+3, Y+a11+3
mov r0, rTt+3
com r0
ldd rVv+3, Y+a21+3
and r0, rVv+3
eor rC1+3, r0
std Y+a01+3, rC1+3
; a11 ^= ~a21 & c1;
mov r0, rVv+0
com r0
and r0, rC1+0
eor rTt+0, r0
std Y+a11+0, rTt+0
mov r0, rVv+1
com r0
and r0, rC1+1
eor rTt+1, r0
std Y+a11+1, rTt+1
mov r0, rVv+2
com r0
and r0, rC1+2
eor rTt+2, r0
std Y+a11+2, rTt+2
mov r0, rVv+3
com r0
and r0, rC1+3
eor rTt+3, r0
std Y+a11+3, rTt+3
; v1 = ROTL32(a21 ^ ~c1 & a11, 8);
mov r0, rC1+0
com r0
and r0, rTt+0
eor rVv+0, r0 ; v1 not yet ROTL32'ed(8)
mov r0, rC1+1
com r0
and r0, rTt+1
eor rVv+1, r0
mov r0, rC1+2
com r0
and r0, rTt+2
eor rVv+2, r0
mov r0, rC1+3
com r0
and r0, rTt+3
eor rVv+3, r0
; c1 ^= a11 = ROTL32(a11, 1);
lsl rTt+0
rol rTt+1
eor rC1+1, rTt+1
std Y+a11+1, rTt+1
rol rTt+2
eor rC1+2, rTt+2
std Y+a11+2, rTt+2
rol rTt+3
eor rC1+3, rTt+3
std Y+a11+3, rTt+3
adc rTt+0, zero
eor rC1+0, rTt+0
std Y+a11+0, rTt+0
; a03 = c3 ^= ~a13 & a23;
ldd r0, Y+a13+0
com r0
ldd rTt+0, Y+a23+0 ; a23 in rTt
and r0, rTt+0
eor rC3+0, r0
std Y+a03+0, rC3+0
ldd r0, Y+a13+1
com r0
ldd rTt+1, Y+a23+1
and r0, rTt+1
eor rC3+1, r0
std Y+a03+1, rC3+1
ldd r0, Y+a13+2
com r0
ldd rTt+2, Y+a23+2
and r0, rTt+2
eor rC3+2, r0
std Y+a03+2, rC3+2
ldd r0, Y+a13+3
com r0
ldd rTt+3, Y+a23+3
and r0, rTt+3
eor rC3+3, r0
std Y+a03+3, rC3+3
; a13 ^= ~a23 & c3;
mov r0, rTt+0
com r0
and r0, rC3+0
ldd rTt+0, Y+a13+0 ; a13 in rTt
eor rTt+0, r0
mov r0, rTt+1
com r0
and r0, rC3+1
ldd rTt+1, Y+a13+1
eor rTt+1, r0
mov r0, rTt+2
com r0
and r0, rC3+2
ldd rTt+2, Y+a13+2
eor rTt+2, r0
mov r0, rTt+3
com r0
and r0, rC3+3
ldd rTt+3, Y+a13+3
eor rTt+3, r0
; c1 ^= a21 = ROTL32(a23 ^ ~c3 & a13, 8);
push rVv
mov r0, rC3+0
com r0
and r0, rTt+0
ldd rVv, Y+a23+0
eor r0, rVv
eor rC1+1, r0
std Y+a21+1, r0
mov r0, rC3+1
com r0
and r0, rTt+1
ldd rVv, Y+a23+1
eor r0, rVv
eor rC1+2, r0
std Y+a21+2, r0
mov r0, rC3+2
com r0
and r0, rTt+2
ldd rVv, Y+a23+2
eor r0, rVv
eor rC1+3, r0
std Y+a21+3, r0
mov r0, rC3+3
com r0
and r0, rTt+3
ldd rVv, Y+a23+3
eor r0, rVv
eor rC1+0, r0
std Y+a21+0, r0
pop rVv
; a23 = v1;
std Y+a23+0, rVv+3 ; rol8(rVv)
std Y+a23+1, rVv+0
std Y+a23+2, rVv+1
std Y+a23+3, rVv+2
; c3 ^= v1;
eor rC3+0, rVv+3
eor rC3+1, rVv+0
eor rC3+2, rVv+1
eor rC3+3, rVv+2
; c3 ^= a13 = ROTL32(a13, 1);
lsl rTt+0
rol rTt+1
std Y+a13+1, rTt+1
eor rC3+1, rTt+1
rol rTt+2
std Y+a13+2, rTt+2
eor rC3+2, rTt+2
rol rTt+3
std Y+a13+3, rTt+3
eor rC3+3, rTt+3
adc rTt+0, zero
std Y+a13+0, rTt+0
eor rC3+0, rTt+0
; Check for terminator
lpm r0, Z
inc r0
breq Xoodoo_Done
rjmp Xoodoo_RoundLoop
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_h_
#define _Xoodoo_h_
#include <stdint.h>
#include <stdlib.h>
#define MAXROUNDS 12
#define NROWS 3
#define NCOLUMS 4
/* Round constants */
#define _rc12 0x00000058
#define _rc11 0x00000038
#define _rc10 0x000003C0
#define _rc9 0x000000D0
#define _rc8 0x00000120
#define _rc7 0x00000014
#define _rc6 0x00000060
#define _rc5 0x0000002C
#define _rc4 0x00000380
#define _rc3 0x000000F0
#define _rc2 0x000001A0
#define _rc1 0x00000012
#if !defined(ROTL32)
#if defined (__arm__) && !defined(__GNUC__)
#define ROTL32(a, offset) __ror(a, (32-(offset))%32)
#elif defined(_MSC_VER)
#define ROTL32(a, offset) _rotl(a, (offset)%32)
#define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32)))
#if !defined(READ32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress)))
#elif defined(_MSC_VER)
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#if !defined(WRITE32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData))
#elif defined(_MSC_VER)
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#if !defined(index)
#define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS))
typedef uint32_t tXoodooLane;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_parameters_h_
#define _Xoodyak_parameters_h_
#define Xoodyak_f_bPrime 48
#define Xoodyak_Rhash 16
#define Xoodyak_Rkin 44
#define Xoodyak_Rkout 24
#define Xoodyak_lRatchet 16
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifdef XoodooReference
#include "displayIntermediateValues.h"
#include <assert.h>
#include <string.h>
#include "Xoodyak.h"
#ifdef OUTPUT
#include <stdlib.h>
#include <string.h>
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length);
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length)
unsigned int i;
fprintf(f, "%s:", synopsis);
for(i=0; i<length; i++)
fprintf(f, " %02x", (unsigned int)data[i]);
fprintf(f, "\n");
#define MyMin(a,b) (((a) < (b)) ? (a) : (b))
#ifdef XKCP_has_Xoodoo
#include "Xoodoo-SnP.h"
#define SnP Xoodoo
#define SnP_Permute Xoodoo_Permute_12rounds
#define prefix Xoodyak
#include ""
#undef prefix
#undef SnP
#undef SnP_Permute
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_h_
#define _Xoodyak_h_
#include "config.h"
#ifdef XKCP_has_Xoodoo
#include <stddef.h>
#include "Cyclist.h"
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment)
#error This requires an implementation of Xoodoo
The eXtended Keccak Code Package (XKCP)
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _align_h_
#define _align_h_
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
#ifdef ALIGN
#undef ALIGN
#if defined(__GNUC__)
#define ALIGN(x) __attribute__ ((aligned(x)))
#elif defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#elif defined(__ARMCC_VERSION)
#define ALIGN(x) __align(x)
#define ALIGN(x)
#define CRYPTO_ABYTES 16
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
#elif defined( LITTLE_ENDIAN )
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
#elif defined( _LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
#elif defined( __LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# endif
#elif defined( __BIG_ENDIAN__ )
#elif defined( __LITTLE_ENDIAN__ )
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \
defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ )
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# else
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
/* File generated by ToTargetConfigFile.xsl */
#define XKCP_has_Xoodyak
#define XKCP_has_Xoodoo
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include "crypto_aead.h"
#include "api.h"
#include "Xoodyak.h"
#include <string.h>
#if !defined(CRYPTO_KEYBYTES)
#if !defined(CRYPTO_NPUBBYTES)
#define TAGLEN 16
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
Xoodyak_Instance instance;
Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES);
Xoodyak_Absorb(&instance, ad, (size_t)adlen);
Xoodyak_Encrypt(&instance, m, c, (size_t)mlen);
Xoodyak_Squeeze(&instance, c + mlen, TAGLEN);
*clen = mlen + TAGLEN;
#if 0
unsigned int i;
for (i = 0; i < *clen; ++i )
printf("\\x%02x", c[i] );
return 0;
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
Xoodyak_Instance instance;
unsigned char tag[TAGLEN];
unsigned long long mlen_;
*mlen = 0;
if (clen < TAGLEN) {
return -1;
mlen_ = clen - TAGLEN;
Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES);
Xoodyak_Absorb(&instance, ad, (size_t)adlen);
Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_);
Xoodyak_Squeeze(&instance, tag, TAGLEN);
if (memcmp(tag, c + mlen_, TAGLEN) != 0) {
memset(m, 0, (size_t)mlen_);
return -1;
*mlen = mlen_;
return 0;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Cyclist_h_
#define _Cyclist_h_
#include <stdint.h>
#include "align.h"
#define Cyclist_ModeHash 1
#define Cyclist_ModeKeyed 2
#define Cyclist_PhaseDown 1
#define Cyclist_PhaseUp 2
#ifdef OUTPUT
#include <stdio.h>
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
uint8_t stateShadow[size]; \
FILE *file; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistFunctions(prefix) \
void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \
void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \
void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \
void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \
void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \
void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \
void prefix##_Ratchet(prefix##_Instance *instance);
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
#define SnP_Initialize JOIN(SnP, _Initialize)
#define SnP_AddBytes JOIN(SnP, _AddBytes)
#define SnP_AddByte JOIN(SnP, _AddByte)
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
#define Cyclist_Instance JOIN(prefix, _Instance)
#define Cyclist_Initialize JOIN(prefix, _Initialize)
#define Cyclist_Absorb JOIN(prefix, _Absorb)
#define Cyclist_Encrypt JOIN(prefix, _Encrypt)
#define Cyclist_Decrypt JOIN(prefix, _Decrypt)
#define Cyclist_Squeeze JOIN(prefix, _Squeeze)
#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey)
#define Cyclist_Ratchet JOIN(prefix, _Ratchet)
#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny)
#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey)
#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny)
#define Cyclist_Down JOIN(prefix, _Down)
#define Cyclist_Up JOIN(prefix, _Up)
#define Cyclist_Crypt JOIN(prefix, _Crypt)
#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime)
#define Cyclist_Rhash JOIN(prefix, _Rhash)
#define Cyclist_Rkin JOIN(prefix, _Rkin)
#define Cyclist_Rkout JOIN(prefix, _Rkout)
#define Cyclist_lRatchet JOIN(prefix, _lRatchet)
#if defined(CyclistFullBlocks_supported)
#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks)
#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks)
#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks)
#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks)
#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks)
#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks)
/* ------- Cyclist internal interfaces ------- */
static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd)
SnP_AddBytes(instance->state, Xi, 0, XiLen);
SnP_AddByte(instance->state, 0x01, XiLen);
SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1);
instance->phase = Cyclist_PhaseDown;
static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu)
#if defined(OUTPUT)
uint8_t s[Cyclist_f_bPrime];
if (instance->mode != Cyclist_ModeHash) {
SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1);
#if defined(OUTPUT)
if (instance->file != NULL) {
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime );
SnP_Permute( instance->state );
#if defined(OUTPUT)
if (instance->file != NULL) {
memcpy( instance->stateShadow, instance->state, sizeof(instance->state) );
fprintf( instance->file, "Data XORed" );
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
fprintf( instance->file, "After f() ");
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
instance->phase = Cyclist_PhaseUp;
SnP_ExtractBytes( instance->state, Yi, 0, YiLen );
static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd)
unsigned int splitLen;
do {
if (instance->phase != Cyclist_PhaseUp) {
Cyclist_Up(instance, NULL, 0, 0);
splitLen = MyMin(XLen, r);
Cyclist_Down(instance, X, splitLen, Cd);
Cd = 0;
X += splitLen;
XLen -= splitLen;
#if defined(CyclistFullBlocks_supported)
if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
} while ( XLen != 0 );
static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
uint8_t KID[Cyclist_Rkin];
assert(instance->mode == Cyclist_ModeHash);
assert((KLen + IDLen) <= (Cyclist_Rkin - 1));
instance->mode = Cyclist_ModeKeyed;
instance->Rabsorb = Cyclist_Rkin;
instance->Rsqueeze = Cyclist_Rkout;
if (KLen != 0) {
memcpy(KID, K, KLen);
memcpy(KID + KLen, ID, IDLen);
KID[KLen + IDLen] = (uint8_t)IDLen;
Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02);
if (counterLen != 0) {
Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00);
static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu)
unsigned int len;
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, Cu);
Y += len;
YLen -= len;
while (YLen != 0) {
#if defined(CyclistFullBlocks_supported)
if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
Cyclist_Down(instance, NULL, 0, 0);
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, 0);
Y += len;
YLen -= len;
static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt)
unsigned int splitLen;
uint8_t P[Cyclist_Rkout];
uint8_t Cu = 0x80;
do {
if (decrypt != 0) {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, O, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
else {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
memcpy(P, I, splitLen);
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, P, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
Cu = 0x00;
} while ( IOLen != 0 );
/* ------- Cyclist interfaces ------- */
void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
instance->phase = Cyclist_PhaseUp;
instance->mode = Cyclist_ModeHash;
instance->Rabsorb = Cyclist_Rhash;
instance->Rsqueeze = Cyclist_Rhash;
#ifdef OUTPUT
instance->file = 0;
SnP_Initialize( instance->stateShadow );
if (KLen != 0) {
Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen);
void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen)
Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03);
void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, P, C, PLen, 0);
void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, C, P, CLen, 1);
void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen)
Cyclist_SqueezeAny(instance, Y, YLen, 0x40);
void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_SqueezeAny(instance, K, KLen, 0x20);
void Cyclist_Ratchet(Cyclist_Instance *instance)
uint8_t buffer[Cyclist_lRatchet];
assert(instance->mode == Cyclist_ModeKeyed);
/* Squeeze then absorb is the same as overwriting with zeros */
Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10);
Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00);
#undef SnP_StaticInitialize
#undef SnP_Initialize
#undef SnP_AddBytes
#undef SnP_AddByte
#undef SnP_OverwriteBytes
#undef SnP_ExtractBytes
#undef SnP_ExtractAndAddBytes
#undef Cyclist_Instance
#undef Cyclist_Initialize
#undef Cyclist_Absorb
#undef Cyclist_Encrypt
#undef Cyclist_Decrypt
#undef Cyclist_Squeeze
#undef Cyclist_SqueezeKey
#undef Cyclist_Ratchet
#undef Cyclist_AbsorbAny
#undef Cyclist_AbsorbKey
#undef Cyclist_SqueezeAny
#undef Cyclist_Down
#undef Cyclist_Up
#undef Cyclist_Crypt
#undef Cyclist_f_bPrime
#undef Cyclist_Rhash
#undef Cyclist_Rkin
#undef Cyclist_Rkout
#undef Cyclist_lRatchet
#if defined(CyclistFullBlocks_supported)
#undef Cyclist_AbsorbKeyedFullBlocks
#undef Cyclist_AbsorbHashFullBlocks
#undef Cyclist_SqueezeKeyedFullBlocks
#undef Cyclist_SqueezeHashFullBlocks
#undef Cyclist_EncryptFullBlocks
#undef Cyclist_DecryptFullBlocks
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_SnP_h_
#define _Xoodoo_SnP_h_
#include <stddef.h>
#include <stdint.h>
/** For the documentation, see SnP-documentation.h.
#define Xoodoo_implementation "32-bit optimized implementation"
#define Xoodoo_stateSizeInBytes (3*4*4)
#define Xoodoo_stateAlignment 4
#define Xoodoo_HasNround
#define Xoodoo_StaticInitialize()
void Xoodoo_Initialize(void *state);
#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData)
void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount);
void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds);
void Xoodoo_Permute_6rounds(void *state);
void Xoodoo_Permute_12rounds(void *state);
void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length);
//#define Xoodoo_FastXoofff_supported
//void Xoofff_AddIs( uint8_t *output, const uint8_t *input, size_t bitLen);
//size_t Xoofff_CompressFastLoop(uint8_t *k, uint8_t *xAccu, const uint8_t *input, size_t length);
//size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length);
#define CyclistFullBlocks_supported
size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include <stdio.h>
#include <string.h>
#include "Xoodoo.h"
#define VERBOSE 0
#if (VERBOSE > 0)
#define Dump(__t) printf(__t "\n"); \
printf("a00 %08x, a01 %08x, a02 %08x, a03 %08x\n", a00, a01, a02, a03 ); \
printf("a10 %08x, a11 %08x, a12 %08x, a13 %08x\n", a10, a11, a12, a13 ); \
printf("a20 %08x, a21 %08x, a22 %08x, a23 %08x\n\n", a20, a21, a22, a23 );
#define Dump(__t)
#if (VERBOSE >= 1)
#define Dump1(__t) Dump(__t)
#define Dump1(__t)
#if (VERBOSE >= 2)
#define Dump2(__t) Dump(__t)
#define Dump2(__t)
#if (VERBOSE >= 3)
#define Dump3(__t) Dump(__t)
#define Dump3(__t)
/* ---------------------------------------------------------------- */
void Xoodoo_Initialize(void *state)
memset(state, 0, NLANES*sizeof(tXoodooLane));
/* ---------------------------------------------------------------- */
void Xoodoo_AddBytes(void *argState, const unsigned char *argdata, unsigned int offset, unsigned int length)
if (length == (3*4*4)) {
uint32_t *state = (uint32_t *)argState;
uint32_t *data = (uint32_t *)argdata;
state[0] ^= data[0];
state[1] ^= data[1];
state[2] ^= data[2];
state[3] ^= data[3];
state[4] ^= data[4];
state[5] ^= data[5];
state[6] ^= data[6];
state[7] ^= data[7];
state[8] ^= data[8];
state[9] ^= data[9];
state[10] ^= data[10];
state[11] ^= data[11];
else {
unsigned int sizeLeft = length;
unsigned int lanePosition = offset/4;
unsigned int offsetInLane = offset%4;
const unsigned char *curData = argdata;
uint32_t *state = (uint32_t*)argState;
state += lanePosition;
if ((sizeLeft > 0) && (offsetInLane != 0)) {
unsigned int bytesInLane = 4 - offsetInLane;
uint32_t lane = 0;
if (bytesInLane > sizeLeft)
bytesInLane = sizeLeft;
memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
*state++ ^= lane;
sizeLeft -= bytesInLane;
curData += bytesInLane;
while(sizeLeft >= 4) {
*state++ ^= READ32_UNALIGNED( curData );
sizeLeft -= 4;
curData += 4;
if (sizeLeft > 0) {
uint32_t lane = 0;
memcpy(&lane, curData, sizeLeft);
*state ^= lane;
#error "Not yet implemented"
/* ---------------------------------------------------------------- */
void Xoodoo_OverwriteBytes(void *argstate, const unsigned char *argdata, unsigned int offset, unsigned int length)
if (length == (3*4*4)) {
uint32_t *state = (uint32_t *)argstate;
uint32_t *data = (uint32_t *)argdata;
state[0] = data[0];
state[1] = data[1];
state[2] = data[2];
state[3] = data[3];
state[4] = data[4];
state[5] = data[5];
state[6] = data[6];
state[7] = data[7];
state[8] = data[8];
state[9] = data[9];
state[10] = data[10];
state[11] = data[11];
memcpy((unsigned char*)argstate+offset, argdata, length);
#error "Not yet implemented"
/* ---------------------------------------------------------------- */
void Xoodoo_OverwriteWithZeroes(void *argstate, unsigned int byteCount)
memset(argstate, 0, byteCount);
#error "Not yet implemented"
/* ---------------------------------------------------------------- */
void Xoodoo_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
memcpy(data, (unsigned char*)state+offset, length);
#error "Not yet implemented"
/* ---------------------------------------------------------------- */
void Xoodoo_ExtractAndAddBytes(const void *argState, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
if (length == (3*4*4)) {
uint32_t *state = (uint32_t *)argState;
const uint32_t *ii = (const uint32_t *)input;
uint32_t *oo = (uint32_t *)output;
oo[0] = state[0] ^ ii[0];
oo[1] = state[1] ^ ii[1];
oo[2] = state[2] ^ ii[2];
oo[3] = state[3] ^ ii[3];
oo[4] = state[4] ^ ii[4];
oo[5] = state[5] ^ ii[5];
oo[6] = state[6] ^ ii[6];
oo[7] = state[7] ^ ii[7];
oo[8] = state[8] ^ ii[8];
oo[9] = state[9] ^ ii[9];
oo[10] = state[10] ^ ii[10];
oo[11] = state[11] ^ ii[11];
else {
unsigned int sizeLeft = length;
unsigned int lanePosition = offset/4;
unsigned int offsetInLane = offset%4;
const unsigned char *curInput = input;
unsigned char *curOutput = output;
const uint32_t *state = (const uint32_t*)argState;
state += lanePosition;
if ((sizeLeft > 0) && (offsetInLane != 0)) {
unsigned int bytesInLane = 4 - offsetInLane;
uint32_t lane = *state++ >> (offsetInLane * 8);
if (bytesInLane > sizeLeft)
bytesInLane = sizeLeft;
sizeLeft -= bytesInLane;
do {
*curOutput++ = (*curInput++) ^ (unsigned char)lane;
lane >>= 8;
while ( --bytesInLane != 0);
while(sizeLeft >= 4) {
WRITE32_UNALIGNED( curOutput, READ32_UNALIGNED( curInput ) ^ *state++ );
sizeLeft -= 4;
curInput += 4;
curOutput += 4;
if (sizeLeft > 0) {
uint32_t lane = *state;
do {
*curOutput++ = (*curInput++) ^ (unsigned char)lane;
lane >>= 8;
while ( --sizeLeft != 0 );
#error "Not yet implemented"
/* ---------------------------------------------------------------- */
#define DeclareVars uint32_t a00, a01, a02, a03; \
uint32_t a10, a11, a12, a13; \
uint32_t a20, a21, a22, a23; \
uint32_t v1, v2
#define State2Vars a00 = state[0+0], a01 = state[0+1], a02 = state[0+2], a03 = state[0+3]; \
a10 = state[4+0], a11 = state[4+1], a12 = state[4+2], a13 = state[4+3]; \
a20 = state[8+0], a21 = state[8+1], a22 = state[8+2], a23 = state[8+3]
#define Vars2State state[0+0] = a00, state[0+1] = a01, state[0+2] = a02, state[0+3] = a03; \
state[4+0] = a10, state[4+1] = a11, state[4+2] = a12, state[4+3] = a13; \
state[8+0] = a20, state[8+1] = a21, state[8+2] = a22, state[8+3] = a23
** Theta: Column Parity Mixer
#define Theta() \
v1 = a03 ^ a13 ^ a23; \
v2 = a00 ^ a10 ^ a20; \
v1 = ROTL32(v1, 5) ^ ROTL32(v1, 14); \
a00 ^= v1; \
a10 ^= v1; \
a20 ^= v1; \
v1 = a01 ^ a11 ^ a21; \
v2 = ROTL32(v2, 5) ^ ROTL32(v2, 14); \
a01 ^= v2; \
a11 ^= v2; \
a21 ^= v2; \
v2 = a02 ^ a12 ^ a22; \
v1 = ROTL32(v1, 5) ^ ROTL32(v1, 14); \
a02 ^= v1; \
a12 ^= v1; \
a22 ^= v1; \
v2 = ROTL32(v2, 5) ^ ROTL32(v2, 14); \
a03 ^= v2; \
a13 ^= v2; \
a23 ^= v2
** Rho-west: Plane shift
#define Rho_west() \
a20 = ROTL32(a20, 11); \
a21 = ROTL32(a21, 11); \
a22 = ROTL32(a22, 11); \
a23 = ROTL32(a23, 11); \
v1 = a13; \
a13 = a12; \
a12 = a11; \
a11 = a10; \
a10 = v1
** Iota: Round constants
#define Iota(__rc) a00 ^= __rc
** Chi: Non linear step, on colums
#define Chi() \
a00 ^= ~a10 & a20; \
a10 ^= ~a20 & a00; \
a20 ^= ~a00 & a10; \
a01 ^= ~a11 & a21; \
a11 ^= ~a21 & a01; \
a21 ^= ~a01 & a11; \
a02 ^= ~a12 & a22; \
a12 ^= ~a22 & a02; \
a22 ^= ~a02 & a12; \
a03 ^= ~a13 & a23; \
a13 ^= ~a23 & a03; \
a23 ^= ~a03 & a13
** Rho-east: Plane shift
#define Rho_east() \
a10 = ROTL32(a10, 1); \
a11 = ROTL32(a11, 1); \
a12 = ROTL32(a12, 1); \
a13 = ROTL32(a13, 1); \
v1 = ROTL32(a23, 8); \
a23 = ROTL32(a21, 8); \
a21 = v1; \
v1 = ROTL32(a22, 8); \
a22 = ROTL32(a20, 8); \
a20 = v1
#define Round(__rc) \
Theta(); \
Dump3("Theta"); \
Rho_west(); \
Dump3("Rho-west"); \
Iota(__rc); \
Dump3("Iota"); \
Chi(); \
Dump3("Chi"); \
Rho_east(); \
static const uint32_t RC[MAXROUNDS] = {
void Xoodoo_Permute_Nrounds( uint32_t * state, uint32_t nr )
uint32_t i;
for (i = MAXROUNDS - nr; i < MAXROUNDS; ++i ) {
void Xoodoo_Permute_6rounds( uint32_t * state)
void Xoodoo_Permute_12rounds( uint32_t * state)
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_h_
#define _Xoodoo_h_
#include <stdint.h>
#include <stdlib.h>
#define MAXROUNDS 12
#define NROWS 3
#define NCOLUMS 4
/* Round constants */
#define _rc12 0x00000058
#define _rc11 0x00000038
#define _rc10 0x000003C0
#define _rc9 0x000000D0
#define _rc8 0x00000120
#define _rc7 0x00000014
#define _rc6 0x00000060
#define _rc5 0x0000002C
#define _rc4 0x00000380
#define _rc3 0x000000F0
#define _rc2 0x000001A0
#define _rc1 0x00000012
#if !defined(ROTL32)
#if defined (__arm__) && !defined(__GNUC__)
#define ROTL32(a, offset) __ror(a, (32-(offset))%32)
#elif defined(_MSC_VER)
#define ROTL32(a, offset) _rotl(a, (offset)%32)
#define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32)))
#if !defined(READ32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress)))
#elif defined(_MSC_VER)
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#if !defined(WRITE32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData))
#elif defined(_MSC_VER)
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#if !defined(index)
#define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS))
typedef uint32_t tXoodooLane;
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
#ifdef OUTPUT
#include <stdio.h>
#define SnP_AddByte Xoodoo_AddByte
#define SnP_AddBytes Xoodoo_AddBytes
#define SnP_ExtractBytes Xoodoo_ExtractBytes
#define SnP_ExtractAndAddBytes Xoodoo_ExtractAndAddBytes
#define SnP_Permute Xoodoo_Permute_12rounds
#define SnP_OverwriteBytes Xoodoo_OverwriteBytes
size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen)
size_t initialLength = XLen;
do {
SnP_Permute(state); /* Xoodyak_Up(instance, NULL, 0, 0); */
SnP_AddBytes(state, X, 0, Xoodyak_Rkin); /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0); */
SnP_AddByte(state, 0x01, Xoodyak_Rkin);
X += Xoodyak_Rkin;
XLen -= Xoodyak_Rkin;
} while (XLen >= Xoodyak_Rkin);
return initialLength - XLen;
size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen)
size_t initialLength = XLen;
do {
SnP_Permute(state); /* Xoodyak_Up(instance, NULL, 0, 0); */
SnP_AddBytes(state, X, 0, Xoodyak_Rhash); /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0); */
SnP_AddByte(state, 0x01, Xoodyak_Rhash);
X += Xoodyak_Rhash;
XLen -= Xoodyak_Rhash;
} while (XLen >= Xoodyak_Rhash);
return initialLength - XLen;
size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen)
size_t initialLength = YLen;
do {
SnP_AddByte(state, 0x01, 0); /* Xoodyak_Down(instance, NULL, 0, 0); */
SnP_Permute(state); /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0); */
SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout);
Y += Xoodyak_Rkout;
YLen -= Xoodyak_Rkout;
} while (YLen >= Xoodyak_Rkout);
return initialLength - YLen;
size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen)
size_t initialLength = YLen;
do {
SnP_AddByte(state, 0x01, 0); /* Xoodyak_Down(instance, NULL, 0, 0); */
SnP_Permute(state); /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0); */
SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash);
Y += Xoodyak_Rhash;
YLen -= Xoodyak_Rhash;
} while (YLen >= Xoodyak_Rhash);
return initialLength - YLen;
size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
size_t initialLength = IOLen;
do {
SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout);
SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout);
SnP_AddByte(state, 0x01, Xoodyak_Rkout);
I += Xoodyak_Rkout;
O += Xoodyak_Rkout;
IOLen -= Xoodyak_Rkout;
} while (IOLen >= Xoodyak_Rkout);
return initialLength - IOLen;
size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
size_t initialLength = IOLen;
do {
SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout);
SnP_AddBytes(state, O, 0, Xoodyak_Rkout);
SnP_AddByte(state, 0x01, Xoodyak_Rkout);
I += Xoodyak_Rkout;
O += Xoodyak_Rkout;
IOLen -= Xoodyak_Rkout;
} while (IOLen >= Xoodyak_Rkout);
return initialLength - IOLen;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_parameters_h_
#define _Xoodyak_parameters_h_
#define Xoodyak_f_bPrime 48
#define Xoodyak_Rhash 16
#define Xoodyak_Rkin 44
#define Xoodyak_Rkout 24
#define Xoodyak_lRatchet 16
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifdef XoodooReference
#include "displayIntermediateValues.h"
#include <assert.h>
#include <string.h>
#include "Xoodyak.h"
#ifdef OUTPUT
#include <stdlib.h>
#include <string.h>
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length);
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length)
unsigned int i;
fprintf(f, "%s:", synopsis);
for(i=0; i<length; i++)
fprintf(f, " %02x", (unsigned int)data[i]);
fprintf(f, "\n");
#define MyMin(a,b) (((a) < (b)) ? (a) : (b))
#ifdef XKCP_has_Xoodoo
#include "Xoodoo-SnP.h"
#define SnP Xoodoo
#define SnP_Permute Xoodoo_Permute_12rounds
#define prefix Xoodyak
#include ""
#undef prefix
#undef SnP
#undef SnP_Permute
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_h_
#define _Xoodyak_h_
#include "config.h"
#ifdef XKCP_has_Xoodoo
#include <stddef.h>
#include "Cyclist.h"
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment)
#error This requires an implementation of Xoodoo
The eXtended Keccak Code Package (XKCP)
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _align_h_
#define _align_h_
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
#ifdef ALIGN
#undef ALIGN
#if defined(__GNUC__)
#define ALIGN(x) __attribute__ ((aligned(x)))
#elif defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#elif defined(__ARMCC_VERSION)
#define ALIGN(x) __align(x)
#define ALIGN(x)
#define CRYPTO_ABYTES 16
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
#elif defined( LITTLE_ENDIAN )
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
#elif defined( _LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
#elif defined( __LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# endif
#elif defined( __BIG_ENDIAN__ )
#elif defined( __LITTLE_ENDIAN__ )
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \
defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ )
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# else
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
/* File generated by ToTargetConfigFile.xsl */
#define XKCP_has_Xoodyak
#define XKCP_has_Xoodoo
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include "crypto_aead.h"
#include "api.h"
#include "Xoodyak.h"
#include <string.h>
#if !defined(CRYPTO_KEYBYTES)
#if !defined(CRYPTO_NPUBBYTES)
#define TAGLEN 16
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
Xoodyak_Instance instance;
Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES);
Xoodyak_Absorb(&instance, ad, (size_t)adlen);
Xoodyak_Encrypt(&instance, m, c, (size_t)mlen);
Xoodyak_Squeeze(&instance, c + mlen, TAGLEN);
*clen = mlen + TAGLEN;
#if 0
unsigned int i;
for (i = 0; i < *clen; ++i )
printf("\\x%02x", c[i] );
return 0;
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
Xoodyak_Instance instance;
unsigned char tag[TAGLEN];
unsigned long long mlen_;
*mlen = 0;
if (clen < TAGLEN) {
return -1;
mlen_ = clen - TAGLEN;
Xoodyak_Initialize(&instance, k, CRYPTO_KEYBYTES, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, npub, CRYPTO_NPUBBYTES);
Xoodyak_Absorb(&instance, ad, (size_t)adlen);
Xoodyak_Decrypt(&instance, c, m, (size_t)mlen_);
Xoodyak_Squeeze(&instance, tag, TAGLEN);
if (memcmp(tag, c + mlen_, TAGLEN) != 0) {
memset(m, 0, (size_t)mlen_);
return -1;
*mlen = mlen_;
return 0;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Cyclist_h_
#define _Cyclist_h_
#include <stdint.h>
#include "align.h"
#define Cyclist_ModeHash 1
#define Cyclist_ModeKeyed 2
#define Cyclist_PhaseDown 1
#define Cyclist_PhaseUp 2
#ifdef OUTPUT
#include <stdio.h>
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
uint8_t stateShadow[size]; \
FILE *file; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistFunctions(prefix) \
void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \
void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \
void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \
void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \
void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \
void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \
void prefix##_Ratchet(prefix##_Instance *instance);
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
#define SnP_Initialize JOIN(SnP, _Initialize)
#define SnP_AddBytes JOIN(SnP, _AddBytes)
#define SnP_AddByte JOIN(SnP, _AddByte)
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
#define Cyclist_Instance JOIN(prefix, _Instance)
#define Cyclist_Initialize JOIN(prefix, _Initialize)
#define Cyclist_Absorb JOIN(prefix, _Absorb)
#define Cyclist_Encrypt JOIN(prefix, _Encrypt)
#define Cyclist_Decrypt JOIN(prefix, _Decrypt)
#define Cyclist_Squeeze JOIN(prefix, _Squeeze)
#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey)
#define Cyclist_Ratchet JOIN(prefix, _Ratchet)
#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny)
#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey)
#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny)
#define Cyclist_Down JOIN(prefix, _Down)
#define Cyclist_Up JOIN(prefix, _Up)
#define Cyclist_Crypt JOIN(prefix, _Crypt)
#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime)
#define Cyclist_Rhash JOIN(prefix, _Rhash)
#define Cyclist_Rkin JOIN(prefix, _Rkin)
#define Cyclist_Rkout JOIN(prefix, _Rkout)
#define Cyclist_lRatchet JOIN(prefix, _lRatchet)
#if defined(CyclistFullBlocks_supported)
#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks)
#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks)
#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks)
#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks)
#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks)
#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks)
/* ------- Cyclist internal interfaces ------- */
static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd)
SnP_AddBytes(instance->state, Xi, 0, XiLen);
SnP_AddByte(instance->state, 0x01, XiLen);
SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1);
instance->phase = Cyclist_PhaseDown;
static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu)
#if defined(OUTPUT)
uint8_t s[Cyclist_f_bPrime];
if (instance->mode != Cyclist_ModeHash) {
SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1);
#if defined(OUTPUT)
if (instance->file != NULL) {
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime );
SnP_Permute( instance->state );
#if defined(OUTPUT)
if (instance->file != NULL) {
memcpy( instance->stateShadow, instance->state, sizeof(instance->state) );
fprintf( instance->file, "Data XORed" );
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
fprintf( instance->file, "After f() ");
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
instance->phase = Cyclist_PhaseUp;
SnP_ExtractBytes( instance->state, Yi, 0, YiLen );
static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd)
unsigned int splitLen;
do {
if (instance->phase != Cyclist_PhaseUp) {
Cyclist_Up(instance, NULL, 0, 0);
splitLen = MyMin(XLen, r);
Cyclist_Down(instance, X, splitLen, Cd);
Cd = 0;
X += splitLen;
XLen -= splitLen;
#if defined(CyclistFullBlocks_supported)
if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
} while ( XLen != 0 );
static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
uint8_t KID[Cyclist_Rkin];
assert(instance->mode == Cyclist_ModeHash);
assert((KLen + IDLen) <= (Cyclist_Rkin - 1));
instance->mode = Cyclist_ModeKeyed;
instance->Rabsorb = Cyclist_Rkin;
instance->Rsqueeze = Cyclist_Rkout;
if (KLen != 0) {
memcpy(KID, K, KLen);
memcpy(KID + KLen, ID, IDLen);
KID[KLen + IDLen] = (uint8_t)IDLen;
Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02);
if (counterLen != 0) {
Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00);
static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu)
unsigned int len;
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, Cu);
Y += len;
YLen -= len;
while (YLen != 0) {
#if defined(CyclistFullBlocks_supported)
if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
Cyclist_Down(instance, NULL, 0, 0);
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, 0);
Y += len;
YLen -= len;
static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt)
unsigned int splitLen;
uint8_t P[Cyclist_Rkout];
uint8_t Cu = 0x80;
do {
if (decrypt != 0) {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, O, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
else {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
memcpy(P, I, splitLen);
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, P, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
Cu = 0x00;
} while ( IOLen != 0 );
/* ------- Cyclist interfaces ------- */
void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
instance->phase = Cyclist_PhaseUp;
instance->mode = Cyclist_ModeHash;
instance->Rabsorb = Cyclist_Rhash;
instance->Rsqueeze = Cyclist_Rhash;
#ifdef OUTPUT
instance->file = 0;
SnP_Initialize( instance->stateShadow );
if (KLen != 0) {
Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen);
void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen)
Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03);
void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, P, C, PLen, 0);
void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, C, P, CLen, 1);
void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen)
Cyclist_SqueezeAny(instance, Y, YLen, 0x40);
void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_SqueezeAny(instance, K, KLen, 0x20);
void Cyclist_Ratchet(Cyclist_Instance *instance)
uint8_t buffer[Cyclist_lRatchet];
assert(instance->mode == Cyclist_ModeKeyed);
/* Squeeze then absorb is the same as overwriting with zeros */
Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10);
Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00);
#undef SnP_StaticInitialize
#undef SnP_Initialize
#undef SnP_AddBytes
#undef SnP_AddByte
#undef SnP_OverwriteBytes
#undef SnP_ExtractBytes
#undef SnP_ExtractAndAddBytes
#undef Cyclist_Instance
#undef Cyclist_Initialize
#undef Cyclist_Absorb
#undef Cyclist_Encrypt
#undef Cyclist_Decrypt
#undef Cyclist_Squeeze
#undef Cyclist_SqueezeKey
#undef Cyclist_Ratchet
#undef Cyclist_AbsorbAny
#undef Cyclist_AbsorbKey
#undef Cyclist_SqueezeAny
#undef Cyclist_Down
#undef Cyclist_Up
#undef Cyclist_Crypt
#undef Cyclist_f_bPrime
#undef Cyclist_Rhash
#undef Cyclist_Rkin
#undef Cyclist_Rkout
#undef Cyclist_lRatchet
#if defined(CyclistFullBlocks_supported)
#undef Cyclist_AbsorbKeyedFullBlocks
#undef Cyclist_AbsorbHashFullBlocks
#undef Cyclist_SqueezeKeyedFullBlocks
#undef Cyclist_SqueezeHashFullBlocks
#undef Cyclist_EncryptFullBlocks
#undef Cyclist_DecryptFullBlocks
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_SnP_h_
#define _Xoodoo_SnP_h_
#include <stddef.h>
#include <stdint.h>
/** For the documentation, see SnP-documentation.h.
#define Xoodoo_implementation "32-bit optimized ARM assembler implementation"
#define Xoodoo_stateSizeInBytes (3*4*4)
#define Xoodoo_stateAlignment 4
#define Xoodoo_StaticInitialize()
void Xoodoo_Initialize(void *state);
#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData)
void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount);
//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds);
void Xoodoo_Permute_6rounds(void *state);
void Xoodoo_Permute_12rounds(void *state);
void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length);
#define Xoodoo_FastXoofff_supported
void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen);
size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length);
size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length);
#define CyclistFullBlocks_supported
size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv6 architecture (e.g.,@ ARM11).
@ ----------------------------------------------------------------------------
@ void Xoodoo_Initialize(void *state)
.align 4
.global Xoodoo_Initialize
.type Xoodoo_Initialize, %function;
movs r1, #0
movs r2, #0
movs r3, #0
movs r12, #0
stmia r0!, { r1 - r3, r12 }
stmia r0!, { r1 - r3, r12 }
stmia r0!, { r1 - r3, r12 }
bx lr
@ ----------------------------------------------------------------------------
@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.align 4
.global Xoodoo_AddBytes
.type Xoodoo_AddBytes, %function;
push {r4,lr}
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_AddBytes_Bytes
Xoodoo_AddBytes_LanesLoop: @ then, perform on lanes
ldr r2, [r0]
ldr r4, [r1], #4
eors r2, r2, r4
str r2, [r0], #4
subs r3, r3, #4
bcs Xoodoo_AddBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_AddBytes_Exit
ldrb r2, [r0]
ldrb r4, [r1], #1
eors r2, r2, r4
strb r2, [r0], #1
subs r3, r3, #1
bcs Xoodoo_AddBytes_BytesLoop
pop {r4,pc}
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.align 4
.global Xoodoo_OverwriteBytes
.type Xoodoo_OverwriteBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_OverwriteBytes_Bytes
Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words
ldr r2, [r1], #4
str r2, [r0], #4
subs r3, r3, #4
bcs Xoodoo_OverwriteBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_OverwriteBytes_Exit
ldrb r2, [r1], #1
strb r2, [r0], #1
subs r3, r3, #1
bcs Xoodoo_OverwriteBytes_BytesLoop
bx lr
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
.align 4
.global Xoodoo_OverwriteWithZeroes
.type Xoodoo_OverwriteWithZeroes, %function;
movs r3, #0
lsrs r2, r1, #2
beq Xoodoo_OverwriteWithZeroes_Bytes
str r3, [r0], #4
subs r2, r2, #1
bne Xoodoo_OverwriteWithZeroes_LoopLanes
ands r1, #3
beq Xoodoo_OverwriteWithZeroes_Exit
strb r3, [r0], #1
subs r1, r1, #1
bne Xoodoo_OverwriteWithZeroes_LoopBytes
bx lr
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.align 4
.global Xoodoo_ExtractBytes
.type Xoodoo_ExtractBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractBytes_Bytes
Xoodoo_ExtractBytes_LanesLoop: @ then, handle words
ldr r2, [r0], #4
str r2, [r1], #4
subs r3, r3, #4
bcs Xoodoo_ExtractBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_ExtractBytes_Exit
ldrb r2, [r0], #1
strb r2, [r1], #1
subs r3, r3, #1
bcs Xoodoo_ExtractBytes_BytesLoop
bx lr
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
.align 4
.global Xoodoo_ExtractAndAddBytes
.type Xoodoo_ExtractAndAddBytes, %function;
push {r4,r5}
adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length)
ldr r3, [sp, #8] @ get length argument from stack
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractAndAddBytes_Bytes
Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, handle words
ldr r5, [r0], #4
ldr r4, [r1], #4
eors r5, r5, r4
str r5, [r2], #4
subs r3, r3, #4
bcs Xoodoo_ExtractAndAddBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_ExtractAndAddBytes_Exit
ldrb r5, [r0], #1
ldrb r4, [r1], #1
eors r5, r5, r4
strb r5, [r2], #1
subs r3, r3, #1
bcs Xoodoo_ExtractAndAddBytes_BytesLoop
pop {r4,r5}
bx lr
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _t3 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
.equ _rc12 , 0x00000058
.equ _rc11 , 0x00000038
.equ _rc10 , 0x000003C0
.equ _rc9 , 0x000000D0
.equ _rc8 , 0x00000120
.equ _rc7 , 0x00000014
.equ _rc6 , 0x00000060
.equ _rc5 , 0x0000002C
.equ _rc4 , 0x00000380
.equ _rc3 , 0x000000F0
.equ _rc2 , 0x000001A0
.equ _rc1 , 0x00000012
.equ _rc6x1, 0x00000003
.equ _rc5x2, 0x0b000000
.equ _rc4x3, 0x07000000
.equ _rc3x4, 0x000f0000
.equ _rc2x5, 0x0000d000
.equ _rc1x6, 0x00000048
.equ _rc12x1, 0xc0000002
.equ _rc11x2, 0x0e000000
.equ _rc10x3, 0x07800000
.equ _rc9x4 , 0x000d0000
.equ _rc8x5 , 0x00009000
.equ _rc7x6 , 0x00000050
.equ _rc6x7 , 0x0000000c
.equ _rc5x8 , 0x2c000000
.equ _rc4x9 , 0x1c000000
.equ _rc3x10, 0x003c0000
.equ _rc2x11, 0x00034000
.equ _rc1x12, 0x00000120
@ ----------------------------------------------------------------------------
.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2
.if ((\rho_e1)%32) == 0
eors \ro, \a0, \a1
eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32
.if ((\rho_e2)%32) == 0
eors \ro, \ro, \a2
eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32
.macro mRliXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ro, \ri, ROR #(32-(\rot))%32
.macro mRloXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ri, \ro, ROR #(32-(\rot))%32
.macro mChi3 a0,a1,a2,r0,r1
bic \r0, \a2, \a1, ROR #_w1
eors \a0, \a0, \r0, ROR #32-_w1
bic \r1, \a0, \a2, ROR #32-_w1
eors \a1, \a1, \r1
bic \r1, \a1, \a0
eors \a2, \a2, \r1, ROR #_w1
.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc
@ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations)
mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2
mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r2, r0, 32-_r0
mRloXor \r6i, r0, \rho_e1-_r0
mRloXor \r10i, r0, \rho_we2-_r0
mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2
mRliXor r1, r1, _r1-_r0
mRloXor r3, r1, 32-_r0
mRloXor \r7i, r1, \rho_e1-_r0
mRloXor \r11i, r1, \rho_we2-_r0
mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r4, r0, 32-_r0
mRloXor \r8i, r0, \rho_e1-_r0
mRloXor \r12i, r0, \rho_we2-_r0
mRliXor r1, r1, _r1-_r0
mRloXor r5, r1, 32-_r0
mRloXor \r9i, r1, \rho_e1-_r0
mRloXor \lri, r1, \rho_we2-_r0
@ After Theta the whole state is rotated -r0
@ from here we must use a1.w instead of a1.i
@ Iota: round constant
.if \rc == 0xc0000002
eor r2, r2, #0x00000002
eor r2, r2, #0xc0000000
eor r2, r2, #\rc
@ Chi: non linear step, on colums
mChi3 r2, \r6w, \r10i, r0, r1
mChi3 r3, \r7w, \r11i, r0, r1
mChi3 r4, \r8w, \r12i, r0, r1
mChi3 r5, \r9w, \lri, r0, r1
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_6rounds( void *state )
.align 4
.global Xoodoo_Permute_6rounds
.type Xoodoo_Permute_6rounds, %function;
push {r0,r4-r11,lr}
ldmia r0!, {r2-r5}
ldmia r0!, {r8-r9}
ldmia r0!, {r6-r7}
ldmia r0, {r10-r12,lr}
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
pop {r0,r1}
ror r2, r2, #32-(6*_r0)%32
ror r3, r3, #32-(6*_r0)%32
ror r4, r4, #32-(6*_r0)%32
ror r5, r5, #32-(6*_r0)%32
ror r6, r6, #32-(6*_r0+1)%32
ror r7, r7, #32-(6*_r0+1)%32
ror r8, r8, #32-(6*_r0+1)%32
ror r9, r9, #32-(6*_r0+1)%32
ror r10, r10, #32-(6*_r0+_e1+_w1)%32
ror r11, r11, #32-(6*_r0+_e1+_w1)%32
ror r12, r12, #32-(6*_r0+_e1+_w1)%32
ror lr, lr, #32-(6*_r0+_e1+_w1)%32
stmia r0, {r2-r12,lr}
mov r4, r1
pop {r5-r11,pc}
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_12rounds( void *state )
.align 4
.global Xoodoo_Permute_12rounds
.type Xoodoo_Permute_12rounds, %function;
push {r0,r4-r11,lr}
ldmia r0, {r2-r12,lr}
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12
ror r2, r2, #32-(12*_r0)%32
ror r3, r3, #32-(12*_r0)%32
ror r4, r4, #32-(12*_r0)%32
ror r5, r5, #32-(12*_r0)%32
ror r6, r6, #32-(12*_r0+1)%32
ror r7, r7, #32-(12*_r0+1)%32
ror r8, r8, #32-(12*_r0+1)%32
ror r9, r9, #32-(12*_r0+1)%32
ror r10, r10, #32-(12*_r0+_e1+_w1)%32
ror r11, r11, #32-(12*_r0+_e1+_w1)%32
ror r12, r12, #32-(12*_r0+_e1+_w1)%32
ror lr, lr, #32-(12*_r0+_e1+_w1)%32
pop {r0,r1}
stmia r0, {r2-r12,lr}
mov r4, r1
pop {r5-r11,pc}
.equ Xoofff_BlockSize , 3*4*4
@ ----------------------------------------------------------------------------
@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen)
.align 4
.global Xoofff_AddIs
.type Xoofff_AddIs, %function;
push {r4-r10,lr}
subs r2, r2, #Xoofff_BlockSize*8
bcc Xoofff_AddIs_LessThanBlock
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
subs r2, r2, #Xoofff_BlockSize*8
bcs Xoofff_AddIs_BlockLoop
adds r2, r2, #Xoofff_BlockSize*8
beq Xoofff_AddIs_Return
subs r2, r2, #16*8
bcc Xoofff_AddIs_LessThan16
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
subs r2, r2, #16*8
bcs Xoofff_AddIs_16Loop
adds r2, r2, #16*8
beq Xoofff_AddIs_Return
subs r2, r2, #4*8
bcc Xoofff_AddIs_LessThan4
ldr r3, [r0]
ldr r7, [r1], #4
eors r3, r3, r7
str r3, [r0], #4
subs r2, r2, #4*8
bcs Xoofff_AddIs_4Loop
adds r2, r2, #4*8
beq Xoofff_AddIs_Return
subs r2, r2, #8
bcc Xoofff_AddIs_LessThan1
ldrb r3, [r0]
ldrb r7, [r1], #1
eors r3, r3, r7
strb r3, [r0], #1
subs r2, r2, #8
bcs Xoofff_AddIs_1Loop
adds r2, r2, #8
beq Xoofff_AddIs_Return
ldrb r3, [r0]
ldrb r7, [r1]
movs r1, #1
eors r3, r3, r7
lsls r1, r1, r2
subs r1, r1, #1
ands r3, r3, r1
strb r3, [r0]
pop {r4-r10,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length)
.equ Xoofff_Compress_kRoll , 0
.equ Xoofff_Compress_input , 4
.equ Xoofff_Compress_xAccu , 8
.equ Xoofff_Compress_iInput , 12
.equ Xoofff_Compress_length , 16
.align 4
.global Xoofff_CompressFastLoop
.type Xoofff_CompressFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r12,lr}
push {r0,r2}
ldmia r0, {r2-r12,lr} @ get initial kRoll
ldr r0, [sp, #Xoofff_Compress_input] @ add input
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r5, r5, r1
ldr r1, [r0], #4
eors r6, r6, r1
ldr r1, [r0], #4
eors r7, r7, r1
ldr r1, [r0], #4
eors r8, r8, r1
ldr r1, [r0], #4
eors r9, r9, r1
ldr r1, [r0], #4
eors r10, r10, r1
ldr r1, [r0], #4
eors r11, r11, r1
ldr r1, [r0], #4
eors r12, r12, r1
ldr r1, [r0], #4
eors lr, lr, r1
str r0, [sp, #Xoofff_Compress_input]
@ permutation
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
@ Extract and add into xAccu
ldr r0, [sp, #Xoofff_Compress_xAccu]
ldr r1, [r0]
mRloXor r2, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r2, [r0], #4
mRloXor r3, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r3, [r0], #4
mRloXor r4, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r4, [r0], #4
mRloXor r5, r1, (6*_r0)%32
str r5, [r0], #4
ldm r0, {r2-r5} @ note that r6-r8 and r7-r9 are swapped
mRliXor r2, r8, (6*_r0+1)%32
mRliXor r3, r9, (6*_r0+1)%32
mRliXor r4, r6, (6*_r0+1)%32
mRliXor r5, r7, (6*_r0+1)%32
stm r0!, {r2-r5}
ldm r0, {r2-r5}
mRliXor r2, r10, (6*_r0+_e1+_w1)%32
mRliXor r3, r11, (6*_r0+_e1+_w1)%32
mRliXor r4, r12, (6*_r0+_e1+_w1)%32
mRliXor r5, lr, (6*_r0+_e1+_w1)%32
stm r0!, {r2-r5}
@roll kRoll
ldr r0, [sp, #Xoofff_Compress_kRoll]
ldr lr, [r0], #4
ldmia r0!, {r10-r12}
ldmia r0!, {r2-r9}
eors lr, lr, lr, LSL #13
eors lr, lr, r2, ROR #32-3
sub r0, #Xoofff_BlockSize
stmia r0, {r2-r12,lr}
@ loop management
ldr r0, [sp, #Xoofff_Compress_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_Compress_length]
bcs Xoofff_CompressFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_Compress_input]
ldr r1, [sp, #Xoofff_Compress_iInput]
sub r0, r0, r1
pop {r1,r2}
pop {r1-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length)
.equ Xoofff_Expand_yAccu , 0
.equ Xoofff_Expand_output , 4
.equ Xoofff_Expand_kRoll , 8
.equ Xoofff_Expand_iOutput , 12
.equ Xoofff_Expand_length , 16
.align 4
.global Xoofff_ExpandFastLoop
.type Xoofff_ExpandFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r12,lr}
push {r0,r2}
ldmia r0, {r2-r12,lr} @ get initial yAccu
@ permutation
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
@ Add k and extract
ldr r0, [sp, #Xoofff_Expand_kRoll]
ldr r1, [r0], #4
mRloXor r2, r1, (6*_r0)%32
ldr r1, [sp, #Xoofff_Expand_output]
str r2, [r1], #4
ldr r2, [r0], #4
mRloXor r3, r2, (6*_r0)%32
ldr r2, [r0], #4
str r3, [r1], #4
mRloXor r4, r2, (6*_r0)%32
ldr r2, [r0], #4
str r4, [r1], #4
mRloXor r5, r2, (6*_r0)%32
str r5, [r1], #4
ldm r0!, {r2-r5} @ Note that r6-r8 and r7-r9 are swapped
mRliXor r2, r8, (6*_r0+1)%32
str r2, [r1], #4
mRliXor r3, r9, (6*_r0+1)%32
str r3, [r1], #4
mRliXor r4, r6, (6*_r0+1)%32
str r4, [r1], #4
mRliXor r5, r7, (6*_r0+1)%32
str r5, [r1], #4
ldm r0!, {r2-r5}
mRliXor r2, r10, (6*_r0+_e1+_w1)%32
str r2, [r1], #4
mRliXor r3, r11, (6*_r0+_e1+_w1)%32
str r3, [r1], #4
mRliXor r4, r12, (6*_r0+_e1+_w1)%32
str r4, [r1], #4
mRliXor r5, lr, (6*_r0+_e1+_w1)%32
str r5, [r1], #4
@ roll-e yAccu
ldr r0, [sp, #Xoofff_Expand_yAccu]
str r1, [sp, #Xoofff_Expand_output]
ldr lr, [r0], #4
ldmia r0!, {r10-r12}
ldmia r0!, {r2-r9}
and r1, r6, r2
eor lr, r1, lr, ROR #32-5
eor lr, lr, r2, ROR #32-13
eor lr, lr, #7
sub r0, #Xoofff_BlockSize
stmia r0, {r2-r12,lr}
@ loop management
ldr r0, [sp, #Xoofff_Expand_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_Expand_length]
bcs Xoofff_ExpandFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_Expand_output]
ldr r1, [sp, #Xoofff_Expand_iOutput]
sub r0, r0, r1
pop {r1,r2}
pop {r1-r12,pc}
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_h_
#define _Xoodoo_h_
#include <stdint.h>
#include <stdlib.h>
#define MAXROUNDS 12
#define NROWS 3
#define NCOLUMS 4
/* Round constants */
#define _rc12 0x00000058
#define _rc11 0x00000038
#define _rc10 0x000003C0
#define _rc9 0x000000D0
#define _rc8 0x00000120
#define _rc7 0x00000014
#define _rc6 0x00000060
#define _rc5 0x0000002C
#define _rc4 0x00000380
#define _rc3 0x000000F0
#define _rc2 0x000001A0
#define _rc1 0x00000012
#if !defined(ROTL32)
#if defined (__arm__) && !defined(__GNUC__)
#define ROTL32(a, offset) __ror(a, (32-(offset))%32)
#elif defined(_MSC_VER)
#define ROTL32(a, offset) _rotl(a, (offset)%32)
#define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32)))
#if !defined(READ32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress)))
#elif defined(_MSC_VER)
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#if !defined(WRITE32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData))
#elif defined(_MSC_VER)
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#if !defined(index)
#define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS))
typedef uint32_t tXoodooLane;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_parameters_h_
#define _Xoodyak_parameters_h_
#define Xoodyak_f_bPrime 48
#define Xoodyak_Rhash 16
#define Xoodyak_Rkin 44
#define Xoodyak_Rkout 24
#define Xoodyak_lRatchet 16
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv6 architecture (e.g.,@ ARM11).
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _t3 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
.equ _rc12 , 0x00000058
.equ _rc11 , 0x00000038
.equ _rc10 , 0x000003C0
.equ _rc9 , 0x000000D0
.equ _rc8 , 0x00000120
.equ _rc7 , 0x00000014
.equ _rc6 , 0x00000060
.equ _rc5 , 0x0000002C
.equ _rc4 , 0x00000380
.equ _rc3 , 0x000000F0
.equ _rc2 , 0x000001A0
.equ _rc1 , 0x00000012
.equ _rc6x1 , 0x00000003
.equ _rc5x2 , 0x0b000000
.equ _rc4x3 , 0x07000000
.equ _rc3x4 , 0x000f0000
.equ _rc2x5 , 0x0000d000
.equ _rc1x6 , 0x00000048
.equ _rc12x1, 0xc0000002
.equ _rc11x2, 0x0e000000
.equ _rc10x3, 0x07800000
.equ _rc9x4 , 0x000d0000
.equ _rc8x5 , 0x00009000
.equ _rc7x6 , 0x00000050
.equ _rc6x7 , 0x0000000c
.equ _rc5x8 , 0x2c000000
.equ _rc4x9 , 0x1c000000
.equ _rc3x10, 0x003c0000
.equ _rc2x11, 0x00034000
.equ _rc1x12, 0x00000120
@ ----------------------------------------------------------------------------
.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2
.if ((\rho_e1)%32) == 0
eors \ro, \a0, \a1
eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32
.if ((\rho_e2)%32) == 0
eors \ro, \ro, \a2
eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32
.macro mRliXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ro, \ri, ROR #(32-(\rot))%32
.macro mRloXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ri, \ro, ROR #(32-(\rot))%32
.macro mChi3 a0,a1,a2,r0,r1
bic \r0, \a2, \a1, ROR #_w1
eors \a0, \a0, \r0, ROR #32-_w1
bic \r1, \a0, \a2, ROR #32-_w1
eors \a1, \a1, \r1
bic \r1, \a1, \a0
eors \a2, \a2, \r1, ROR #_w1
.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc
@ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations)
mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2
mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r2, r0, 32-_r0
mRloXor \r6i, r0, \rho_e1-_r0
mRloXor \r10i, r0, \rho_we2-_r0
mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2
mRliXor r1, r1, _r1-_r0
mRloXor r3, r1, 32-_r0
mRloXor \r7i, r1, \rho_e1-_r0
mRloXor \r11i, r1, \rho_we2-_r0
mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r4, r0, 32-_r0
mRloXor \r8i, r0, \rho_e1-_r0
mRloXor \r12i, r0, \rho_we2-_r0
mRliXor r1, r1, _r1-_r0
mRloXor r5, r1, 32-_r0
mRloXor \r9i, r1, \rho_e1-_r0
mRloXor \lri, r1, \rho_we2-_r0
@ After Theta the whole state is rotated -r0
@ from here we must use a1.w instead of a1.i
@ Iota: round constant
.if \rc == 0xc0000002
eor r2, r2, #0x00000002
eor r2, r2, #0xc0000000
eor r2, r2, #\rc
@ Chi: non linear step, on colums
mChi3 r2, \r6w, \r10i, r0, r1
mChi3 r3, \r7w, \r11i, r0, r1
mChi3 r4, \r8w, \r12i, r0, r1
mChi3 r5, \r9w, \lri, r0, r1
.equ offsetInstance , 0
.equ offsetInitialLen , 16
.equ offsetReturn , 20
@ ----------------------------------------------------------------------------
@ Xoodoo_Permute_12roundsAsm: only callable from asm
.align 4
.type Xoodoo_Permute_12roundsAsm, %function;
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12
ror r2, r2, #32-(12*_r0)%32
ror r3, r3, #32-(12*_r0)%32
ror r4, r4, #32-(12*_r0)%32
ror r5, r5, #32-(12*_r0)%32
ror r6, r6, #32-(12*_r0+1)%32
ror r7, r7, #32-(12*_r0+1)%32
ror r8, r8, #32-(12*_r0+1)%32
ror r9, r9, #32-(12*_r0+1)%32
ror r10, r10, #32-(12*_r0+_e1+_w1)%32
ror r11, r11, #32-(12*_r0+_e1+_w1)%32
ror r12, r12, #32-(12*_r0+_e1+_w1)%32
ror lr, lr, #32-(12*_r0+_e1+_w1)%32
ldr pc, [sp, #offsetReturn]
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@
@ X += Xoodyak_Rkin@
@ XLen -= Xoodyak_Rkin@
@ } while (XLen >= Xoodyak_Rkin)@
@ return initialLength - XLen@
@ }
.equ offsetAbsorbX , 4
.equ offsetAbsorbXLen , 8
.align 4
.global Xoodyak_AbsorbKeyedFullBlocks
.type Xoodyak_AbsorbKeyedFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #44
ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
b Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetAbsorbX]
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r5, r5, r1
ldr r1, [r0], #4
eors r6, r6, r1
ldr r1, [r0], #4
eors r7, r7, r1
ldr r1, [r0], #4
eors r8, r8, r1
ldr r1, [r0], #4
eors r9, r9, r1
ldr r1, [r0], #4
eors r10, r10, r1
ldr r1, [r0], #4
eors r11, r11, r1
ldr r1, [r0], #4
eors lr, lr, #1
eors r12, r12, r1
ldr r1, [sp, #offsetAbsorbXLen]
str r0, [sp, #offsetAbsorbX]
subs r1, r1, #44
str r1, [sp, #offsetAbsorbXLen]
bcs Xoodyak_AbsorbKeyedFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #44
sub r0, r4, r2
pop {r4-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@
@ X += Xoodyak_Rhash@
@ XLen -= Xoodyak_Rhash@
@ } while (XLen >= Xoodyak_Rhash)@
@ return initialLength - XLen@
@ }
.align 4
.global Xoodyak_AbsorbHashFullBlocks
.type Xoodyak_AbsorbHashFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #16
ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
b Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetAbsorbX]
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r6, r6, #1
eors r5, r5, r1
ldr r1, [sp, #offsetAbsorbXLen]
str r0, [sp, #offsetAbsorbX]
subs r1, r1, #16
str r1, [sp, #offsetAbsorbXLen]
bcs Xoodyak_AbsorbHashFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #16
sub r0, r4, r2
pop {r4-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@
@ Y += Xoodyak_Rkout@
@ YLen -= Xoodyak_Rkout@
@ } while (YLen >= Xoodyak_Rkout)@
@ return initialLength - YLen@
@ }
.equ offsetSqueezeY , 4
.equ offsetSqueezeYLen , 8
.align 4
.global Xoodyak_SqueezeKeyedFullBlocks
.type Xoodyak_SqueezeKeyedFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #24
ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
eors r2, r2, #1
b Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetSqueezeY]
str r2, [r0], #4
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
str r7, [r0], #4
ldr r1, [sp, #offsetSqueezeYLen]
str r0, [sp, #offsetSqueezeY]
subs r1, r1, #24
str r1, [sp, #offsetSqueezeYLen]
bcs Xoodyak_SqueezeKeyedFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #24
sub r0, r4, r2
pop {r4-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@
@ Y += Xoodyak_Rhash@
@ YLen -= Xoodyak_Rhash@
@ } while (YLen >= Xoodyak_Rhash)@
@ return initialLength - YLen@
@ }
.align 4
.global Xoodyak_SqueezeHashFullBlocks
.type Xoodyak_SqueezeHashFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #16
ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
eors r2, r2, #1
b Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetSqueezeY]
str r2, [r0], #4
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
ldr r1, [sp, #offsetSqueezeYLen]
str r0, [sp, #offsetSqueezeY]
subs r1, r1, #16
str r1, [sp, #offsetSqueezeYLen]
bcs Xoodyak_SqueezeHashFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #16
sub r0, r4, r2
pop {r4-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.equ offsetCryptI , 4+8
.equ offsetCryptO , 8+8
.equ offsetCryptIOLen , 12
.align 4
.global Xoodyak_EncryptFullBlocks
.type Xoodyak_EncryptFullBlocks, %function;
push {r4-r12,lr}
mov r4, r3 @ r4 initialLength
subs r3, r3, #24
ldr r5, =Xoodyak_EncryptFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
b Xoodoo_Permute_12roundsAsm
push {r10, r11}
ldr r11, [sp, #offsetCryptI]
ldr r10, [sp, #offsetCryptO]
ldr r0, [r11], #4
ldr r1, [r11], #4
eors r2, r2, r0
str r2, [r10], #4
eors r3, r3, r1
ldr r0, [r11], #4
str r3, [r10], #4
eors r4, r4, r0
ldr r1, [r11], #4
str r4, [r10], #4
eors r5, r5, r1
ldr r0, [r11], #4
str r5, [r10], #4
eors r6, r6, r0
ldr r1, [r11], #4
str r6, [r10], #4
eors r7, r7, r1
str r7, [r10], #4
str r10, [sp, #offsetCryptO]
str r11, [sp, #offsetCryptI]
pop {r10, r11}
ldr r0, [sp, #offsetCryptIOLen]
eors r8, r8, #1
subs r0, r0, #24
str r0, [sp, #offsetCryptIOLen]
bcs Xoodyak_EncryptFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r3, r3, #24
sub r0, r4, r3
pop {r4-r12,pc}
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.align 4
.global Xoodyak_DecryptFullBlocks
.type Xoodyak_DecryptFullBlocks, %function;
push {r4-r12,lr}
mov r4, r3 @ r4 initialLength
subs r3, r3, #24
ldr r5, =Xoodyak_DecryptFullBlocks_Ret
push {r0-r5}
ldmia r0, {r2-r12,lr}
b Xoodoo_Permute_12roundsAsm
push {r10, r11}
ldr r11, [sp, #offsetCryptI]
ldr r10, [sp, #offsetCryptO]
ldr r0, [r11], #4
ldr r1, [r11], #4
eors r2, r2, r0
str r2, [r10], #4
mov r2, r0
eors r3, r3, r1
ldr r0, [r11], #4
str r3, [r10], #4
mov r3, r1
eors r4, r4, r0
ldr r1, [r11], #4
str r4, [r10], #4
mov r4, r0
eors r5, r5, r1
ldr r0, [r11], #4
str r5, [r10], #4
mov r5, r1
eors r6, r6, r0
ldr r1, [r11], #4
str r6, [r10], #4
mov r6, r0
eors r7, r7, r1
str r7, [r10], #4
mov r7, r1
str r10, [sp, #offsetCryptO]
str r11, [sp, #offsetCryptI]
pop {r10, r11}
ldr r0, [sp, #offsetCryptIOLen]
eors r8, r8, #1
subs r0, r0, #24
str r0, [sp, #offsetCryptIOLen]
bcs Xoodyak_DecryptFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r3, r3, #24
sub r0, r4, r3
pop {r4-r12,pc}
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifdef XoodooReference
#include "displayIntermediateValues.h"
#include <assert.h>
#include <string.h>
#include "Xoodyak.h"
#ifdef OUTPUT
#include <stdlib.h>
#include <string.h>
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length);
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length)
unsigned int i;
fprintf(f, "%s:", synopsis);
for(i=0; i<length; i++)
fprintf(f, " %02x", (unsigned int)data[i]);
fprintf(f, "\n");
#define MyMin(a,b) (((a) < (b)) ? (a) : (b))
#ifdef XKCP_has_Xoodoo
#include "Xoodoo-SnP.h"
#define SnP Xoodoo
#define SnP_Permute Xoodoo_Permute_12rounds
#define prefix Xoodyak
#include ""
#undef prefix
#undef SnP
#undef SnP_Permute
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_h_
#define _Xoodyak_h_
#include "config.h"
#ifdef XKCP_has_Xoodoo
#include <stddef.h>
#include "Cyclist.h"
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment)
#error This requires an implementation of Xoodoo
The eXtended Keccak Code Package (XKCP)
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _align_h_
#define _align_h_
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
#ifdef ALIGN
#undef ALIGN
#if defined(__GNUC__)
#define ALIGN(x) __attribute__ ((aligned(x)))
#elif defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#elif defined(__ARMCC_VERSION)
#define ALIGN(x) __align(x)
#define ALIGN(x)
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
#elif defined( LITTLE_ENDIAN )
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
#elif defined( _LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
#elif defined( __LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# endif
#elif defined( __BIG_ENDIAN__ )
#elif defined( __LITTLE_ENDIAN__ )
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \
defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ )
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# else
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
/* File generated by ToTargetConfigFile.xsl */
#define XKCP_has_Xoodyak
#define XKCP_has_Xoodoo
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include "crypto_hash.h"
#ifndef crypto_hash_BYTES
#define crypto_hash_BYTES 32
#include "Xoodyak.h"
int crypto_hash(unsigned char *out, const unsigned char *in, unsigned long long inlen)
Xoodyak_Instance instance;
Xoodyak_Initialize(&instance, NULL, 0, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, in, (size_t)inlen);
Xoodyak_Squeeze(&instance, out, crypto_hash_BYTES);
#if 0
unsigned int i;
for (i = 0; i < crypto_hash_BYTES; ++i )
printf("\\x%02x", out[i] );
return 0;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Cyclist_h_
#define _Cyclist_h_
#include <stdint.h>
#include "align.h"
#define Cyclist_ModeHash 1
#define Cyclist_ModeKeyed 2
#define Cyclist_PhaseDown 1
#define Cyclist_PhaseUp 2
#ifdef OUTPUT
#include <stdio.h>
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
uint8_t stateShadow[size]; \
FILE *file; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistFunctions(prefix) \
void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \
void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \
void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \
void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \
void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \
void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \
void prefix##_Ratchet(prefix##_Instance *instance);
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
#define SnP_Initialize JOIN(SnP, _Initialize)
#define SnP_AddBytes JOIN(SnP, _AddBytes)
#define SnP_AddByte JOIN(SnP, _AddByte)
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
#define Cyclist_Instance JOIN(prefix, _Instance)
#define Cyclist_Initialize JOIN(prefix, _Initialize)
#define Cyclist_Absorb JOIN(prefix, _Absorb)
#define Cyclist_Encrypt JOIN(prefix, _Encrypt)
#define Cyclist_Decrypt JOIN(prefix, _Decrypt)
#define Cyclist_Squeeze JOIN(prefix, _Squeeze)
#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey)
#define Cyclist_Ratchet JOIN(prefix, _Ratchet)
#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny)
#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey)
#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny)
#define Cyclist_Down JOIN(prefix, _Down)
#define Cyclist_Up JOIN(prefix, _Up)
#define Cyclist_Crypt JOIN(prefix, _Crypt)
#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime)
#define Cyclist_Rhash JOIN(prefix, _Rhash)
#define Cyclist_Rkin JOIN(prefix, _Rkin)
#define Cyclist_Rkout JOIN(prefix, _Rkout)
#define Cyclist_lRatchet JOIN(prefix, _lRatchet)
#if defined(CyclistFullBlocks_supported)
#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks)
#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks)
#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks)
#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks)
#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks)
#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks)
/* ------- Cyclist internal interfaces ------- */
static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd)
SnP_AddBytes(instance->state, Xi, 0, XiLen);
SnP_AddByte(instance->state, 0x01, XiLen);
SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1);
instance->phase = Cyclist_PhaseDown;
static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu)
#if defined(OUTPUT)
uint8_t s[Cyclist_f_bPrime];
if (instance->mode != Cyclist_ModeHash) {
SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1);
#if defined(OUTPUT)
if (instance->file != NULL) {
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime );
SnP_Permute( instance->state );
#if defined(OUTPUT)
if (instance->file != NULL) {
memcpy( instance->stateShadow, instance->state, sizeof(instance->state) );
fprintf( instance->file, "Data XORed" );
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
fprintf( instance->file, "After f() ");
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
instance->phase = Cyclist_PhaseUp;
SnP_ExtractBytes( instance->state, Yi, 0, YiLen );
static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd)
unsigned int splitLen;
do {
if (instance->phase != Cyclist_PhaseUp) {
Cyclist_Up(instance, NULL, 0, 0);
splitLen = MyMin(XLen, r);
Cyclist_Down(instance, X, splitLen, Cd);
Cd = 0;
X += splitLen;
XLen -= splitLen;
#if defined(CyclistFullBlocks_supported)
if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
} while ( XLen != 0 );
static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
uint8_t KID[Cyclist_Rkin];
assert(instance->mode == Cyclist_ModeHash);
assert((KLen + IDLen) <= (Cyclist_Rkin - 1));
instance->mode = Cyclist_ModeKeyed;
instance->Rabsorb = Cyclist_Rkin;
instance->Rsqueeze = Cyclist_Rkout;
if (KLen != 0) {
memcpy(KID, K, KLen);
memcpy(KID + KLen, ID, IDLen);
KID[KLen + IDLen] = (uint8_t)IDLen;
Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02);
if (counterLen != 0) {
Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00);
static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu)
unsigned int len;
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, Cu);
Y += len;
YLen -= len;
while (YLen != 0) {
#if defined(CyclistFullBlocks_supported)
if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
Cyclist_Down(instance, NULL, 0, 0);
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, 0);
Y += len;
YLen -= len;
static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt)
unsigned int splitLen;
uint8_t P[Cyclist_Rkout];
uint8_t Cu = 0x80;
do {
if (decrypt != 0) {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, O, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
else {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
memcpy(P, I, splitLen);
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, P, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
Cu = 0x00;
} while ( IOLen != 0 );
/* ------- Cyclist interfaces ------- */
void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
instance->phase = Cyclist_PhaseUp;
instance->mode = Cyclist_ModeHash;
instance->Rabsorb = Cyclist_Rhash;
instance->Rsqueeze = Cyclist_Rhash;
#ifdef OUTPUT
instance->file = 0;
SnP_Initialize( instance->stateShadow );
if (KLen != 0) {
Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen);
void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen)
Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03);
void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, P, C, PLen, 0);
void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, C, P, CLen, 1);
void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen)
Cyclist_SqueezeAny(instance, Y, YLen, 0x40);
void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_SqueezeAny(instance, K, KLen, 0x20);
void Cyclist_Ratchet(Cyclist_Instance *instance)
uint8_t buffer[Cyclist_lRatchet];
assert(instance->mode == Cyclist_ModeKeyed);
/* Squeeze then absorb is the same as overwriting with zeros */
Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10);
Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00);
#undef SnP_StaticInitialize
#undef SnP_Initialize
#undef SnP_AddBytes
#undef SnP_AddByte
#undef SnP_OverwriteBytes
#undef SnP_ExtractBytes
#undef SnP_ExtractAndAddBytes
#undef Cyclist_Instance
#undef Cyclist_Initialize
#undef Cyclist_Absorb
#undef Cyclist_Encrypt
#undef Cyclist_Decrypt
#undef Cyclist_Squeeze
#undef Cyclist_SqueezeKey
#undef Cyclist_Ratchet
#undef Cyclist_AbsorbAny
#undef Cyclist_AbsorbKey
#undef Cyclist_SqueezeAny
#undef Cyclist_Down
#undef Cyclist_Up
#undef Cyclist_Crypt
#undef Cyclist_f_bPrime
#undef Cyclist_Rhash
#undef Cyclist_Rkin
#undef Cyclist_Rkout
#undef Cyclist_lRatchet
#if defined(CyclistFullBlocks_supported)
#undef Cyclist_AbsorbKeyedFullBlocks
#undef Cyclist_AbsorbHashFullBlocks
#undef Cyclist_SqueezeKeyedFullBlocks
#undef Cyclist_SqueezeHashFullBlocks
#undef Cyclist_EncryptFullBlocks
#undef Cyclist_DecryptFullBlocks
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_SnP_h_
#define _Xoodoo_SnP_h_
#include <stddef.h>
#include <stdint.h>
/** For the documentation, see SnP-documentation.h.
#define Xoodoo_implementation "32-bit optimized ARM assembler implementation"
#define Xoodoo_stateSizeInBytes (3*4*4)
#define Xoodoo_stateAlignment 4
#define Xoodoo_StaticInitialize()
void Xoodoo_Initialize(void *state);
#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData)
void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount);
//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds);
void Xoodoo_Permute_6rounds(void *state);
void Xoodoo_Permute_12rounds(void *state);
void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length);
#define Xoodoo_FastXoofff_supported
void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen);
size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length);
size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length);
#define CyclistFullBlocks_supported
size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv6m architecture (Cortex-M0, ...).
.syntax unified
@ ----------------------------------------------------------------------------
@ void Xoodoo_Initialize(void *state)
.align 4
.global Xoodoo_Initialize
.type Xoodoo_Initialize, %function;
movs r1, #0
movs r2, #0
movs r3, #0
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
stmia r0!, { r1 - r3 }
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_AddBytes
.type Xoodoo_AddBytes, %function;
push {r4,lr}
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_AddBytes_Bytes
movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed
orrs r2, r2, r1
lsls r2, #30
bne Xoodoo_AddBytes_Bytes
Xoodoo_AddBytes_LanesLoop: @ then, perform on words
ldr r2, [r0]
ldmia r1!, {r4}
eors r2, r2, r4
stmia r0!, {r2}
subs r3, r3, #4
bcs Xoodoo_AddBytes_LanesLoop
adds r3, r3, #4
beq Xoodoo_AddBytes_Exit
subs r3, r3, #1
ldrb r2, [r0, r3]
ldrb r4, [r1, r3]
eors r2, r2, r4
strb r2, [r0, r3]
subs r3, r3, #1
bcs Xoodoo_AddBytes_BytesLoop
pop {r4,pc}
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_OverwriteBytes
.type Xoodoo_OverwriteBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_OverwriteBytes_Bytes
movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed
orrs r2, r2, r1
lsls r2, #30
bne Xoodoo_OverwriteBytes_Bytes
Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words
ldmia r1!, {r2}
stmia r0!, {r2}
subs r3, r3, #4
bcs Xoodoo_OverwriteBytes_LanesLoop
adds r3, r3, #4
beq Xoodoo_OverwriteBytes_Exit
subs r3, r3, #1
ldrb r2, [r1, r3]
strb r2, [r0, r3]
subs r3, r3, #1
bcs Xoodoo_OverwriteBytes_BytesLoop
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
.global Xoodoo_OverwriteWithZeroes
.type Xoodoo_OverwriteWithZeroes, %function;
movs r3, #0
lsrs r2, r1, #2
beq Xoodoo_OverwriteWithZeroes_Bytes
stm r0!, { r3 }
subs r2, r2, #1
bne Xoodoo_OverwriteWithZeroes_LoopLanes
lsls r1, r1, #32-2
beq Xoodoo_OverwriteWithZeroes_Exit
lsrs r1, r1, #32-2
subs r1, r1, #1
strb r3, [r0, r1]
bne Xoodoo_OverwriteWithZeroes_LoopBytes
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_ExtractBytes
.type Xoodoo_ExtractBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractBytes_Bytes
movs r2, r0 @ and data pointer and offset both 32-bit .align 8ed
orrs r2, r2, r1
lsls r2, #30
bne Xoodoo_ExtractBytes_Bytes
Xoodoo_ExtractBytes_LanesLoop: @ then, perform on words
ldmia r0!, {r2}
stmia r1!, {r2}
subs r3, r3, #4
bcs Xoodoo_ExtractBytes_LanesLoop
adds r3, r3, #4
beq Xoodoo_ExtractBytes_Exit
subs r3, r3, #1
ldrb r2, [r0, r3]
strb r2, [r1, r3]
subs r3, r3, #1
bcs Xoodoo_ExtractBytes_BytesLoop
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
.global Xoodoo_ExtractAndAddBytes
.type Xoodoo_ExtractAndAddBytes, %function;
push {r4,r5}
adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length)
ldr r3, [sp, #8] @ get length argument from stack
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractAndAddBytes_Bytes
movs r5, r0 @ and input/output/state pointer all 32-bit .align 8ed
orrs r5, r5, r1
orrs r5, r5, r2
lsls r5, #30
bne Xoodoo_ExtractAndAddBytes_Bytes
Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, perform on words
ldmia r0!, {r5}
ldmia r1!, {r4}
eors r5, r5, r4
stmia r2!, {r5}
subs r3, r3, #4
bcs Xoodoo_ExtractAndAddBytes_LanesLoop
adds r3, r3, #4
beq Xoodoo_ExtractAndAddBytes_Exit
subs r3, r3, #1
ldrb r5, [r0, r3]
ldrb r4, [r1, r3]
eors r5, r5, r4
strb r5, [r2, r3]
subs r3, r3, #1
bcs Xoodoo_ExtractAndAddBytes_BytesLoop
pop {r4,r5}
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ offsets in RAM state
.equ _oA00 , 0*4
.equ _oA01 , 1*4
.equ _oA02 , 2*4
.equ _oA03 , 3*4
.equ _oA10 , 4*4
.equ _oA11 , 5*4
.equ _oA12 , 6*4
.equ _oA13 , 7*4
.equ _oA20 , 8*4
.equ _oA21 , 9*4
.equ _oA22 , 10*4
.equ _oA23 , 11*4
@ possible locations of state lanes
.equ locRegL , 1
.equ locRegH , 2
.equ locMem , 3
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _r2 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
@ ----------------------------------------------------------------------------
.macro mXor3 ro, a0, a1, a2, loc, tt
mov \ro, \a1
eors \ro, \ro, \a2
.if \loc == locRegL
eors \ro, \ro, \a0
.if \loc == locRegH
mov \tt, \a0
ldr \tt, [sp, #\a0]
eors \ro, \ro, \tt
.macro mXor ro, ri, tt, loc
.if \loc == locRegL
eors \ro, \ro, \ri
.if \loc == locRegH
mov \tt, \ro
eors \tt, \tt, \ri
mov \ro, \tt
ldr \tt, [sp, #\ro]
eors \tt, \tt, \ri
str \tt, [sp, #\ro]
.macro mChi3 a0,a1,a2,r0,r1,a0s,loc
mov \r1, \a2
mov \r0, \a1
bics \r1, \r1, \r0
eors \a0, \a0, \r1
.if \loc != locRegL
.if \loc == locRegH
mov \a0s, \a0
str \a0, [sp, #\a0s]
mov \r0, \a0
bics \r0, \r0, \a2
mov \r1, \a1
eors \r1, \r1, \r0
mov \a1, \r1
bics \r1, \r1, \a0
eors \a2, \a2, \r1
.macro mRound offsetRC, offsetA03
@ Theta: Column Parity Mixer
mXor3 r0, \offsetA03, lr, r7, locMem, r2
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r3, r10, r4, locRegL, r2
mXor r3, r1, r2, locRegL
mXor r10, r1, r2, locRegH
mXor r4, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r8, r11, r5, locRegH, r2
mXor r8, r1, r2, locRegH
mXor r11, r1, r2, locRegH
mXor r5, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r9, r12, r6, locRegH, r2
mXor r9, r1, r2, locRegH
mXor r12, r1, r2, locRegH
mXor r6, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor \offsetA03, r1, r2, locMem
mXor lr, r1, r2, locRegH
mXor r7, r1, r2, locRegL
@ Rho-west: Plane shift
movs r0, #32-_w1
rors r4, r4, r0
rors r5, r5, r0
rors r6, r6, r0
rors r7, r7, r0
mov r0, lr
mov lr, r12
mov r12, r11
mov r11, r10
mov r10, r0
@ Iota: round constant
ldr r0, [sp, #\offsetRC]
ldmia r0!, {r1}
str r0, [sp, #\offsetRC]
eors r3, r3, r1
@ Chi: non linear step, on colums
mChi3 r3, r10, r4, r0, r1, r3, locRegL
mov r2, r8
mChi3 r2, r11, r5, r0, r1, r8, locRegH
mov r2, r9
mChi3 r2, r12, r6, r0, r1, r9, locRegH
ldr r2, [sp, #\offsetA03]
mChi3 r2, lr, r7, r0, r1, \offsetA03, locMem
@ Rho-east: Plane shift
movs r0, #32-1
mov r1, r10
rors r1, r1, r0
mov r10, r1
mov r1, r11
rors r1, r1, r0
mov r11, r1
mov r1, r12
rors r1, r1, r0
mov r12, r1
mov r1, lr
rors r1, r1, r0
mov lr, r1
movs r0, #32-_e1
rors r4, r4, r0
rors r5, r5, r0
rors r6, r6, r0
rors r7, r7, r0
mov r0, r4
mov r4, r6
mov r6, r0
mov r0, r5
mov r5, r7
mov r7, r0
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds)
@ offsets on stack
.equ Xoodoo_Permute_Nrounds_offsetA03 , 0
.equ Xoodoo_Permute_Nrounds_offsetRC , 4
.equ Xoodoo_Permute_Nrounds_SAS , 8
.equ Xoodoo_Permute_Nrounds_offsetState , Xoodoo_Permute_Nrounds_SAS
.global Xoodoo_Permute_Nrounds
.type Xoodoo_Permute_Nrounds, %function;
push {r4-r6,lr}
mov r2, r8
mov r3, r9
mov r4, r10
mov r5, r11
push {r0,r2-r5,r7}
sub sp, #Xoodoo_Permute_Nrounds_SAS
adr r2, Xoodoo_Permute_RoundConstants12
lsls r1, r1, #2
subs r2, r2, r1
str r2, [sp, #Xoodoo_Permute_Nrounds_offsetRC]
ldm r0!, {r3,r5,r6,r7}
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_Nrounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
mRound Xoodoo_Permute_Nrounds_offsetRC, Xoodoo_Permute_Nrounds_offsetA03
ldr r0, [sp, #Xoodoo_Permute_Nrounds_offsetRC]
ldr r0, [r0]
cmp r0, #0
beq Xoodoo_Permute_Nrouds_Done
b Xoodoo_Permute_Nrouds_Loop
ldr r0, [sp, #Xoodoo_Permute_Nrounds_offsetState]
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_Nrounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
add sp, #Xoodoo_Permute_Nrounds_SAS
pop {r0-r4,r7}
mov r8, r1
mov r9, r2
mov r10, r3
mov r11, r4
pop {r4-r6,pc}
.align 4
.long 0x00000058
.long 0x00000038
.long 0x000003C0
.long 0x000000D0
.long 0x00000120
.long 0x00000014
.long 0x00000060
.long 0x0000002C
.long 0x00000380
.long 0x000000F0
.long 0x000001A0
.long 0x00000012
.long 0
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_6rounds( void *state )
.global Xoodoo_Permute_6rounds
.type Xoodoo_Permute_6rounds, %function;
movs r1, #6
b Xoodoo_Permute_Nrounds
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_12rounds( void *state )
.global Xoodoo_Permute_12rounds
.type Xoodoo_Permute_12rounds, %function;
movs r1, #12
b Xoodoo_Permute_Nrounds
.align 4
.equ Xoofff_BlockSize , 3*4*4
@ ----------------------------------------------------------------------------
@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen)
.global Xoofff_AddIs
.type Xoofff_AddIs, %function;
push {r4-r6,lr}
movs r3, r0 @ check input and output pointer both 32-bit .align 8ed
orrs r3, r3, r1
lsls r3, r3, #30
bne Xoofff_AddIs_Bytes
subs r2, r2, #16*8
bcc Xoofff_AddIs_LessThan16
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldmia r1!, {r5,r6}
eors r3, r3, r5
eors r4, r4, r6
stmia r0!, {r3,r4}
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldmia r1!, {r5,r6}
eors r3, r3, r5
eors r4, r4, r6
stmia r0!, {r3,r4}
subs r2, r2, #16*8
bcs Xoofff_AddIs_16Loop
adds r2, r2, #16*8
beq Xoofff_AddIs_Return
subs r2, r2, #4*8
bcc Xoofff_AddIs_LessThan4
ldr r3, [r0]
ldmia r1!, {r4}
eors r3, r3, r4
stmia r0!, {r3}
subs r2, r2, #4*8
bcs Xoofff_AddIs_4Loop
adds r2, r2, #4*8
beq Xoofff_AddIs_Return
subs r2, r2, #8
bcc Xoofff_AddIs_LessThan1
ldrb r3, [r0]
ldrb r4, [r1]
adds r1, r1, #1
eors r3, r3, r4
strb r3, [r0]
adds r0, r0, #1
subs r2, r2, #8
bcs Xoofff_AddIs_1Loop
adds r2, r2, #8
beq Xoofff_AddIs_Return
ldrb r3, [r0]
ldrb r4, [r1]
movs r1, #1
eors r3, r3, r4
lsls r1, r1, r2
subs r1, r1, #1
ands r3, r3, r1
strb r3, [r0]
pop {r4-r6,pc}
.align 4
.macro mLdu rv, ri, tt
ldrb \rv, [\ri, #3]
lsls \rv, \rv, #8
ldrb \tt, [\ri, #2]
orrs \rv, \rv, \tt
lsls \rv, \rv, #8
ldrb \tt, [\ri, #1]
orrs \rv, \rv, \tt
lsls \rv, \rv, #8
ldrb \tt, [\ri, #0]
orrs \rv, \rv, \tt
adds \ri, \ri, #4
@ ----------------------------------------------------------------------------
@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length)
@ offsets on stack
.equ Xoofff_CompressFastLoop_offsetA03 , 0
.equ Xoofff_CompressFastLoop_offsetRC , 4
.equ Xoofff_CompressFastLoop_SAS , 8
.equ Xoofff_CompressFastLoop_kRoll , Xoofff_CompressFastLoop_SAS+0
.equ Xoofff_CompressFastLoop_input , Xoofff_CompressFastLoop_SAS+4
.equ Xoofff_CompressFastLoop_xAccu , Xoofff_CompressFastLoop_SAS+8+16
.equ Xoofff_CompressFastLoop_iInput , Xoofff_CompressFastLoop_SAS+12+16
.equ Xoofff_CompressFastLoop_length , Xoofff_CompressFastLoop_SAS+16+16
.global Xoofff_CompressFastLoop
.type Xoofff_CompressFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r0,r2,r4-r7}
sub sp, #Xoofff_CompressFastLoop_SAS
ldm r0!, {r3,r5,r6,r7} @ get initial kRoll
mov r8, r5
mov r9, r6
str r7, [sp, #Xoofff_CompressFastLoop_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
adr r1, Xoofff_CompressFastLoop_RoundConstants6
str r1, [sp, #Xoofff_CompressFastLoop_offsetRC]
ldr r0, [sp, #Xoofff_CompressFastLoop_input] @ add input
lsls r1, r0, #30
bne Xoofff_CompressFastLoop_Unaligned
ldmia r0!, {r1}
eors r3, r3, r1
ldmia r0!, {r1}
mov r2, r8
eors r2, r2, r1
mov r8, r2
ldmia r0!, {r1}
mov r2, r9
eors r2, r2, r1
mov r9, r2
ldmia r0!, {r1}
ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoofff_CompressFastLoop_offsetA03]
ldmia r0!, {r1}
mov r2, r10
eors r2, r2, r1
mov r10, r2
ldmia r0!, {r1}
mov r2, r11
eors r2, r2, r1
mov r11, r2
ldmia r0!, {r1}
mov r2, r12
eors r2, r2, r1
mov r12, r2
ldmia r0!, {r1}
mov r2, lr
eors r2, r2, r1
mov lr, r2
ldmia r0!, {r1,r2}
eors r4, r4, r1
eors r5, r5, r2
ldmia r0!, {r1,r2}
eors r6, r6, r1
eors r7, r7, r2
b Xoofff_CompressFastLoop_Permute
.align 4
.long 0x00000060
.long 0x0000002C
.long 0x00000380
.long 0x000000F0
.long 0x000001A0
.long 0x00000012
.long 0
mLdu r1, r0, r2
eors r3, r3, r1
mLdu r1, r0, r2
mov r2, r8
eors r2, r2, r1
mov r8, r2
mLdu r1, r0, r2
mov r2, r9
eors r2, r2, r1
mov r9, r2
mLdu r1, r0, r2
ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoofff_CompressFastLoop_offsetA03]
mLdu r1, r0, r2
mov r2, r10
eors r2, r2, r1
mov r10, r2
mLdu r1, r0, r2
mov r2, r11
eors r2, r2, r1
mov r11, r2
mLdu r1, r0, r2
mov r2, r12
eors r2, r2, r1
mov r12, r2
mLdu r1, r0, r2
mov r2, lr
eors r2, r2, r1
mov lr, r2
mLdu r1, r0, r2
eors r4, r4, r1
mLdu r1, r0, r2
eors r5, r5, r1
mLdu r1, r0, r2
eors r6, r6, r1
mLdu r1, r0, r2
eors r7, r7, r1
str r0, [sp, #Xoofff_CompressFastLoop_input]
mRound Xoofff_CompressFastLoop_offsetRC, Xoofff_CompressFastLoop_offsetA03
ldr r0, [sp, #Xoofff_CompressFastLoop_offsetRC]
ldr r0, [r0]
cmp r0, #0
beq Xoofff_CompressFastLoop_PermuteDone
b Xoofff_CompressFastLoop_PermuteLoop
@ Extract and add into xAccu
ldr r0, [sp, #Xoofff_CompressFastLoop_xAccu]
ldr r1, [r0]
eors r1, r1, r3
stmia r0!, {r1}
ldr r1, [r0]
mov r2, r8
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
mov r2, r9
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
ldr r2, [sp, #Xoofff_CompressFastLoop_offsetA03]
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
mov r2, r10
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
mov r2, r11
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
mov r2, r12
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0]
mov r2, lr
eors r1, r1, r2
stmia r0!, {r1}
ldr r1, [r0, #0]
ldr r2, [r0, #4]
ldr r3, [r0, #8]
eors r1, r1, r4
ldr r4, [r0, #12]
eors r2, r2, r5
eors r3, r3, r6
eors r4, r4, r7
stm r0!, {r1,r2,r3,r4}
@roll kRoll-c
ldr r0, [sp, #Xoofff_CompressFastLoop_kRoll]
ldmia r0!, {r7}
ldmia r0!, {r4-r6}
ldmia r0!, {r3}
ldmia r0!, {r1,r2}
mov r8, r1
mov r9, r2
ldmia r0!, {r1,r2}
str r1, [sp, #Xoofff_CompressFastLoop_offsetA03]
mov r10, r2
ldmia r0!, {r1,r2}
mov r11, r1
mov r12, r2
ldmia r0!, {r1}
mov lr, r1
lsls r1, r7, #13
eors r7, r7, r1
mov r1, r3
movs r2, #32-3
rors r1, r1, r2
eors r7, r7, r1
subs r0, r0, #Xoofff_BlockSize
stmia r0!, {r3}
mov r1, r8
mov r2, r9
stmia r0!, {r1,r2}
ldr r1, [sp, #Xoofff_CompressFastLoop_offsetA03]
mov r2, r10
stmia r0!, {r1,r2}
mov r1, r11
mov r2, r12
stmia r0!, {r1,r2}
mov r1, lr
stmia r0!, {r1,r4-r7}
@ loop management
ldr r0, [sp, #Xoofff_CompressFastLoop_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_CompressFastLoop_length]
bcc Xoofff_CompressFastLoop_Done
b Xoofff_CompressFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_CompressFastLoop_input]
ldr r1, [sp, #Xoofff_CompressFastLoop_iInput]
subs r0, r0, r1
add sp, #Xoofff_CompressFastLoop_SAS+8
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r1-r7,pc}
.align 4
.macro mStu rv, ro
strb \rv, [\ro, #0]
lsrs \rv, \rv, #8
strb \rv, [\ro, #1]
lsrs \rv, \rv, #8
strb \rv, [\ro, #2]
lsrs \rv, \rv, #8
strb \rv, [\ro, #3]
adds \ro, \ro, #4
@ ----------------------------------------------------------------------------
@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length)
@ offsets on stack
.equ Xoofff_ExpandFastLoop_offsetA03, 0
.equ Xoofff_ExpandFastLoop_offsetRC , 4
.equ Xoofff_ExpandFastLoop_SAS , 8
.equ Xoofff_ExpandFastLoop_yAccu , Xoofff_ExpandFastLoop_SAS+0
.equ Xoofff_ExpandFastLoop_output , Xoofff_ExpandFastLoop_SAS+4
.equ Xoofff_ExpandFastLoop_kRoll , Xoofff_ExpandFastLoop_SAS+8+16
.equ Xoofff_ExpandFastLoop_iOutput , Xoofff_ExpandFastLoop_SAS+12+16
.equ Xoofff_ExpandFastLoop_length , Xoofff_ExpandFastLoop_SAS+16+16
.global Xoofff_ExpandFastLoop
.type Xoofff_ExpandFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r0,r2,r4-r7}
sub sp, #Xoofff_ExpandFastLoop_SAS
ldm r0!, {r3,r5,r6,r7} @ get initial yAccu
mov r8, r5
mov r9, r6
str r7, [sp, #Xoofff_ExpandFastLoop_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
adr r1, Xoofff_ExpandFastLoop_RoundConstants6
str r1, [sp, #Xoofff_ExpandFastLoop_offsetRC]
mRound Xoofff_ExpandFastLoop_offsetRC, Xoofff_ExpandFastLoop_offsetA03
ldr r0, [sp, #Xoofff_ExpandFastLoop_offsetRC]
ldr r0, [r0]
cmp r0, #0
beq Xoofff_ExpandFastLoop_PermuteDone
b Xoofff_ExpandFastLoop_PermuteLoop
.long 0x00000060
.long 0x0000002C
.long 0x00000380
.long 0x000000F0
.long 0x000001A0
.long 0x00000012
.long 0
@ Add k and extract
ldr r0, [sp, #Xoofff_ExpandFastLoop_kRoll]
ldr r1, [sp, #Xoofff_ExpandFastLoop_output] @ add input
lsls r2, r1, #30
bne Xoofff_ExpandFastLoop_Unaligned
ldmia r0!, {r2}
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, r8
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, r9
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
ldr r3, [sp, #Xoofff_ExpandFastLoop_offsetA03]
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, r10
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, r11
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, r12
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2}
mov r3, lr
eors r2, r2, r3
stmia r1!, {r2}
ldmia r0!, {r2,r3}
eors r2, r2, r4
eors r3, r3, r5
stmia r1!, {r2,r3}
ldmia r0!, {r2,r3}
eors r2, r2, r6
eors r3, r3, r7
stmia r1!, {r2,r3}
b Xoofff_ExpandFastLoop_ExtractDone
ldmia r0!, {r2}
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, r8
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, r9
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
ldr r3, [sp, #Xoofff_ExpandFastLoop_offsetA03]
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, r10
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, r11
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, r12
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2}
mov r3, lr
eors r2, r2, r3
mStu r2, r1
ldmia r0!, {r2,r3}
eors r2, r2, r4
mStu r2, r1
eors r3, r3, r5
mStu r3, r1
ldmia r0!, {r2,r3}
eors r2, r2, r6
mStu r2, r1
eors r3, r3, r7
mStu r3, r1
str r1, [sp, #Xoofff_ExpandFastLoop_output]
@ roll-e yAccu
ldr r0, [sp, #Xoofff_ExpandFastLoop_yAccu]
ldmia r0!, {r7}
ldmia r0!, {r4-r6}
ldmia r0!, {r3}
ldmia r0!, {r1,r2}
mov r8, r1
mov r9, r2
ldmia r0!, {r1,r2}
str r1, [sp, #Xoofff_ExpandFastLoop_offsetA03]
mov r10, r2
ldmia r0!, {r1,r2}
mov r11, r1
mov r12, r2
ldmia r0!, {r1}
mov lr, r1
mov r1, r10
ands r1, r1, r3
movs r2, #32-5
rors r7, r7, r2
eors r7, r7, r1
movs r2, #32-13
mov r1, r3
rors r1, r1, r2
eors r7, r7, r1
movs r1, #7
eors r7, r7, r1
subs r0, r0, #Xoofff_BlockSize
stmia r0!, {r3}
mov r1, r8
mov r2, r9
stmia r0!, {r1,r2}
ldr r1, [sp, #Xoofff_ExpandFastLoop_offsetA03]
mov r2, r10
stmia r0!, {r1,r2}
mov r1, r11
mov r2, r12
stmia r0!, {r1,r2}
mov r1, lr
stmia r0!, {r1,r4-r7}
@ loop management
ldr r0, [sp, #Xoofff_ExpandFastLoop_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_ExpandFastLoop_length]
bcc Xoofff_ExpandFastLoop_Done
b Xoofff_ExpandFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_ExpandFastLoop_output]
ldr r1, [sp, #Xoofff_ExpandFastLoop_iOutput]
subs r0, r0, r1
add sp, #Xoofff_ExpandFastLoop_SAS+8
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r1-r7,pc}
.align 4
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_h_
#define _Xoodoo_h_
#include <stdint.h>
#include <stdlib.h>
#define MAXROUNDS 12
#define NROWS 3
#define NCOLUMS 4
/* Round constants */
#define _rc12 0x00000058
#define _rc11 0x00000038
#define _rc10 0x000003C0
#define _rc9 0x000000D0
#define _rc8 0x00000120
#define _rc7 0x00000014
#define _rc6 0x00000060
#define _rc5 0x0000002C
#define _rc4 0x00000380
#define _rc3 0x000000F0
#define _rc2 0x000001A0
#define _rc1 0x00000012
#if !defined(ROTL32)
#if defined (__arm__) && !defined(__GNUC__)
#define ROTL32(a, offset) __ror(a, (32-(offset))%32)
#elif defined(_MSC_VER)
#define ROTL32(a, offset) _rotl(a, (offset)%32)
#define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32)))
#if !defined(READ32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress)))
#elif defined(_MSC_VER)
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#if !defined(WRITE32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData))
#elif defined(_MSC_VER)
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#if !defined(index)
#define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS))
typedef uint32_t tXoodooLane;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_parameters_h_
#define _Xoodyak_parameters_h_
#define Xoodyak_f_bPrime 48
#define Xoodyak_Rhash 16
#define Xoodyak_Rkin 44
#define Xoodyak_Rkout 24
#define Xoodyak_lRatchet 16
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv6m architecture (e.g., Cortex-M0).
.syntax unified
@ ----------------------------------------------------------------------------
@ offsets in RAM state
.equ _oA00 , 0*4
.equ _oA01 , 1*4
.equ _oA02 , 2*4
.equ _oA03 , 3*4
.equ _oA10 , 4*4
.equ _oA11 , 5*4
.equ _oA12 , 6*4
.equ _oA13 , 7*4
.equ _oA20 , 8*4
.equ _oA21 , 9*4
.equ _oA22 , 10*4
.equ _oA23 , 11*4
@ possible locations of state lanes
.equ locRegL , 1
.equ locRegH , 2
.equ locMem , 3
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _r2 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
@ ----------------------------------------------------------------------------
.macro mLoadU r, p, o, t
ldrb \r, [\p, #\o+0]
ldrb \t, [\p, #\o+1]
lsls \t, \t, #8
orrs \r, \r, \t
ldrb \t, [\p, #\o+2]
lsls \t, \t, #16
orrs \r, \r, \t
ldrb \t, [\p, #\o+3]
lsls \t, \t, #24
orrs \r, \r, \t
.macro mStoreU p, o, s, t, loc
.if \loc == locRegL
strb \s, [\p, #\o+0]
lsrs \t, \s, #8
mov \t, \s
strb \t, [\p, #\o+0]
lsrs \t, \t, #8
strb \t, [\p, #\o+1]
lsrs \t, \t, #8
strb \t, [\p, #\o+2]
lsrs \t, \t, #8
strb \t, [\p, #\o+3]
.macro mXor3 ro, a0, a1, a2, loc, tt
mov \ro, \a1
eors \ro, \ro, \a2
.if \loc == locRegL
eors \ro, \ro, \a0
.if \loc == locRegH
mov \tt, \a0
ldr \tt, [sp, #\a0]
eors \ro, \ro, \tt
.macro mXor ro, ri, tt, loc
.if \loc == locRegL
eors \ro, \ro, \ri
.if \loc == locRegH
mov \tt, \ro
eors \tt, \tt, \ri
mov \ro, \tt
ldr \tt, [sp, #\ro]
eors \tt, \tt, \ri
str \tt, [sp, #\ro]
.macro mChi3 a0,a1,a2,r0,r1,a0s,loc
mov \r1, \a2
mov \r0, \a1
bics \r1, \r1, \r0
eors \a0, \a0, \r1
.if \loc != locRegL
.if \loc == locRegH
mov \a0s, \a0
str \a0, [sp, #\a0s]
mov \r0, \a0
bics \r0, \r0, \a2
mov \r1, \a1
eors \r1, \r1, \r0
mov \a1, \r1
bics \r1, \r1, \a0
eors \a2, \a2, \r1
.macro mRound offsetRC, offsetA03
@ Theta: Column Parity Mixer
mXor3 r0, \offsetA03, lr, r7, locMem, r2
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r3, r10, r4, locRegL, r2
mXor r3, r1, r2, locRegL
mXor r10, r1, r2, locRegH
mXor r4, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r8, r11, r5, locRegH, r2
mXor r8, r1, r2, locRegH
mXor r11, r1, r2, locRegH
mXor r5, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor3 r0, r9, r12, r6, locRegH, r2
mXor r9, r1, r2, locRegH
mXor r12, r1, r2, locRegH
mXor r6, r1, r2, locRegL
mov r1, r0
movs r2, #32-(_r1-_r0)
rors r1, r1, r2
eors r1, r1, r0
movs r2, #32-_r0
rors r1, r1, r2
mXor \offsetA03, r1, r2, locMem
mXor lr, r1, r2, locRegH
mXor r7, r1, r2, locRegL
@ Rho-west: Plane shift
movs r0, #32-_w1
rors r4, r4, r0
rors r5, r5, r0
rors r6, r6, r0
rors r7, r7, r0
mov r0, lr
mov lr, r12
mov r12, r11
mov r11, r10
mov r10, r0
@ Iota: round constant
ldr r0, [sp, #\offsetRC]
ldmia r0!, {r1}
str r0, [sp, #\offsetRC]
eors r3, r3, r1
@ Chi: non linear step, on colums
mChi3 r3, r10, r4, r0, r1, r3, locRegL
mov r2, r8
mChi3 r2, r11, r5, r0, r1, r8, locRegH
mov r2, r9
mChi3 r2, r12, r6, r0, r1, r9, locRegH
ldr r2, [sp, #\offsetA03]
mChi3 r2, lr, r7, r0, r1, \offsetA03, locMem
@ Rho-east: Plane shift
movs r0, #32-1
mov r1, r10
rors r1, r1, r0
mov r10, r1
mov r1, r11
rors r1, r1, r0
mov r11, r1
mov r1, r12
rors r1, r1, r0
mov r12, r1
mov r1, lr
rors r1, r1, r0
mov lr, r1
movs r0, #32-_e1
rors r4, r4, r0
rors r5, r5, r0
rors r6, r6, r0
rors r7, r7, r0
mov r0, r4
mov r4, r6
mov r6, r0
mov r0, r5
mov r5, r7
mov r7, r0
@ ----------------------------------------------------------------------------
@ Xoodoo_Permute_12roundsAsm
@ offsets on stack
.equ Xoodoo_Permute_12rounds_offsetA03 , 0
.equ Xoodoo_Permute_12rounds_offsetRC , 4
.equ Xoodoo_Permute_12rounds_offsetReturn, 8
.equ Xoodoo_Permute_12rounds_SAS , 12
.align 4
.type Xoodoo_Permute_12roundsAsm, %function;
adr r2, Xoodoo_Permute_RoundConstants12
str r2, [sp, #Xoodoo_Permute_12rounds_offsetRC]
mRound Xoodoo_Permute_12rounds_offsetRC, Xoodoo_Permute_12rounds_offsetA03
ldr r0, [sp, #Xoodoo_Permute_12rounds_offsetRC]
ldr r0, [r0]
cmp r0, #0
beq Xoodoo_Permute_12rounds_Done
b Xoodoo_Permute_12rounds_Loop
ldr r0, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
bx r0
.align 4
.long 0x00000058
.long 0x00000038
.long 0x000003C0
.long 0x000000D0
.long 0x00000120
.long 0x00000014
.long 0x00000060
.long 0x0000002C
.long 0x00000380
.long 0x000000F0
.long 0x000001A0
.long 0x00000012
.long 0
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@
@ X += Xoodyak_Rkin@
@ XLen -= Xoodyak_Rkin@
@ } while (XLen >= Xoodyak_Rkin)@
@ return initialLength - XLen@
@ }
.equ XoodyakAbsorb_offsetState , (Xoodoo_Permute_12rounds_SAS+0)
.equ XoodyakAbsorb_offsetX , (Xoodoo_Permute_12rounds_SAS+4)
.equ XoodyakAbsorb_offsetXLen , (Xoodoo_Permute_12rounds_SAS+8)
.equ XoodyakAbsorb_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+12)
.equ XoodyakAbsorb_SAS , (Xoodoo_Permute_12rounds_SAS+20)
.align 4
.global Xoodyak_AbsorbKeyedFullBlocks
.type Xoodyak_AbsorbKeyedFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakAbsorb_SAS
str r0, [sp, #XoodyakAbsorb_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakAbsorb_offsetX]
str r2, [sp, #XoodyakAbsorb_offsetInitialLen]
subs r2, r2, #44
str r2, [sp, #XoodyakAbsorb_offsetXLen]
ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
ldr r0, [sp, #XoodyakAbsorb_offsetX]
lsls r1, r0, #30
bne Xoodyak_AbsorbKeyedFullBlocks_Unaligned
ldmia r0!, {r1}
eors r3, r3, r1
ldmia r0!, {r1}
mov r2, r8
eors r2, r2, r1
mov r8, r2
ldmia r0!, {r1}
mov r2, r9
eors r2, r2, r1
mov r9, r2
ldmia r0!, {r1}
ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldmia r0!, {r1}
mov r2, r10
eors r2, r2, r1
mov r10, r2
ldmia r0!, {r1}
mov r2, r11
eors r2, r2, r1
mov r11, r2
ldmia r0!, {r1}
mov r2, r12
eors r2, r2, r1
mov r12, r2
ldmia r0!, {r1}
mov r2, lr
eors r2, r2, r1
mov lr, r2
ldmia r0!, {r1}
eors r4, r4, r1
ldmia r0!, {r1}
eors r5, r5, r1
ldmia r0!, {r1}
eors r6, r6, r1
str r0, [sp, #XoodyakAbsorb_offsetX]
movs r2, #1
eors r7, r7, r2
ldr r1, [sp, #XoodyakAbsorb_offsetXLen]
subs r1, r1, #44
str r1, [sp, #XoodyakAbsorb_offsetXLen]
bcs Xoodyak_AbsorbKeyedFullBlocks_Loop
ldr r0, [sp, #XoodyakAbsorb_offsetState]
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakAbsorb_offsetInitialLen]
ldr r2, [sp, #XoodyakAbsorb_offsetXLen]
adds r2, r2, #44
subs r0, r0, r2
add sp, #XoodyakAbsorb_SAS
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mLoadU r1, r0, 0, r2
eors r3, r3, r1
mLoadU r1, r0, 4, r2
mov r2, r8
eors r2, r2, r1
mov r8, r2
mLoadU r1, r0, 8, r2
mov r2, r9
eors r2, r2, r1
mov r9, r2
mLoadU r1, r0, 12, r2
ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
mLoadU r1, r0, 16, r2
mov r2, r10
eors r2, r2, r1
mov r10, r2
mLoadU r1, r0, 20, r2
mov r2, r11
eors r2, r2, r1
mov r11, r2
mLoadU r1, r0, 24, r2
mov r2, r12
eors r2, r2, r1
mov r12, r2
mLoadU r1, r0, 28, r2
mov r2, lr
eors r2, r2, r1
mov lr, r2
adds r0, r0, #32
mLoadU r1, r0, 0, r2
eors r4, r4, r1
mLoadU r1, r0, 4, r2
eors r5, r5, r1
mLoadU r1, r0, 8, r2
eors r6, r6, r1
adds r0, r0, #12
b Xoodyak_AbsorbKeyedFullBlocks_EndLoop
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@
@ X += Xoodyak_Rhash@
@ XLen -= Xoodyak_Rhash@
@ } while (XLen >= Xoodyak_Rhash)@
@ return initialLength - XLen@
@ }
.align 4
.global Xoodyak_AbsorbHashFullBlocks
.type Xoodyak_AbsorbHashFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakAbsorb_SAS
str r0, [sp, #XoodyakAbsorb_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakAbsorb_offsetX]
str r2, [sp, #XoodyakAbsorb_offsetInitialLen]
subs r2, r2, #16
str r2, [sp, #XoodyakAbsorb_offsetXLen]
ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
ldr r0, [sp, #XoodyakAbsorb_offsetX]
lsls r1, r0, #30
bne Xoodyak_AbsorbHashFullBlocks_Unaligned
ldmia r0!, {r1}
eors r3, r3, r1
ldmia r0!, {r1}
mov r2, r8
eors r2, r2, r1
mov r8, r2
ldmia r0!, {r1}
mov r2, r9
eors r2, r2, r1
mov r9, r2
ldmia r0!, {r1}
ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
str r0, [sp, #XoodyakAbsorb_offsetX]
movs r2, #1
mov r1, r10
eors r1, r1, r2
mov r10, r1
ldr r1, [sp, #XoodyakAbsorb_offsetXLen]
subs r1, r1, #16
str r1, [sp, #XoodyakAbsorb_offsetXLen]
bcs Xoodyak_AbsorbHashFullBlocks_Loop
ldr r0, [sp, #XoodyakAbsorb_offsetState]
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakAbsorb_offsetInitialLen]
ldr r2, [sp, #XoodyakAbsorb_offsetXLen]
adds r2, r2, #16
subs r0, r0, r2
add sp, #XoodyakAbsorb_SAS
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mLoadU r1, r0, 0, r2
eors r3, r3, r1
mLoadU r1, r0, 4, r2
mov r2, r8
eors r2, r2, r1
mov r8, r2
mLoadU r1, r0, 8, r2
mov r2, r9
eors r2, r2, r1
mov r9, r2
mLoadU r1, r0, 12, r2
ldr r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
eors r2, r2, r1
str r2, [sp, #Xoodoo_Permute_12rounds_offsetA03]
adds r0, r0, #16
b Xoodyak_AbsorbHashFullBlocks_EndLoop
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@
@ Y += Xoodyak_Rkout@
@ YLen -= Xoodyak_Rkout@
@ } while (YLen >= Xoodyak_Rkout)@
@ return initialLength - YLen@
@ }
.equ XoodyakSqueeze_offsetState , (Xoodoo_Permute_12rounds_SAS+0)
.equ XoodyakSqueeze_offsetY , (Xoodoo_Permute_12rounds_SAS+4)
.equ XoodyakSqueeze_offsetYLen , (Xoodoo_Permute_12rounds_SAS+8)
.equ XoodyakSqueeze_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+12)
.equ XoodyakSqueeze_SAS , (Xoodoo_Permute_12rounds_SAS+20)
.align 4
.global Xoodyak_SqueezeKeyedFullBlocks
.type Xoodyak_SqueezeKeyedFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakSqueeze_SAS
str r0, [sp, #XoodyakSqueeze_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakSqueeze_offsetY]
str r2, [sp, #XoodyakSqueeze_offsetInitialLen]
subs r2, r2, #24
str r2, [sp, #XoodyakSqueeze_offsetYLen]
ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
movs r0, #1
eors r3, r3, r0
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
ldr r0, [sp, #XoodyakSqueeze_offsetY]
lsls r1, r0, #30
bne Xoodyak_SqueezeKeyedFullBlocks_Unaligned
stmia r0!, {r3}
mov r1, r8
mov r2, r9
stmia r0!, {r1, r2}
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03]
mov r2, r10
stmia r0!, {r1, r2}
mov r1, r11
stmia r0!, {r1}
str r0, [sp, #XoodyakSqueeze_offsetY]
ldr r1, [sp, #XoodyakSqueeze_offsetYLen]
subs r1, r1, #24
str r1, [sp, #XoodyakSqueeze_offsetYLen]
bcs Xoodyak_SqueezeKeyedFullBlocks_Loop
ldr r0, [sp, #XoodyakSqueeze_offsetState] @ Save state
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakSqueeze_offsetInitialLen] @ Compute processed length
ldr r2, [sp, #XoodyakSqueeze_offsetYLen]
adds r2, r2, #24
subs r0, r0, r2
add sp, #XoodyakSqueeze_SAS @ Free stack and pop
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mStoreU r0, 0, r3, r2, locRegL
mStoreU r0, 4, r8, r2, locRegH
mStoreU r0, 8, r9, r2, locRegH
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03]
mStoreU r0, 12, r1, r2, locRegL
mStoreU r0, 16, r10, r2, locRegH
mStoreU r0, 20, r11, r2, locRegH
adds r0, r0, #24
b Xoodyak_SqueezeKeyedFullBlocks_EndLoop
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@
@ Y += Xoodyak_Rhash@
@ YLen -= Xoodyak_Rhash@
@ } while (YLen >= Xoodyak_Rhash)@
@ return initialLength - YLen@
@ }
.align 4
.global Xoodyak_SqueezeHashFullBlocks
.type Xoodyak_SqueezeHashFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakSqueeze_SAS
str r0, [sp, #XoodyakSqueeze_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakSqueeze_offsetY]
str r2, [sp, #XoodyakSqueeze_offsetInitialLen]
subs r2, r2, #16
str r2, [sp, #XoodyakSqueeze_offsetYLen]
ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
movs r0, #1
eors r3, r3, r0
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
ldr r0, [sp, #XoodyakSqueeze_offsetY]
lsls r1, r0, #30
bne Xoodyak_SqueezeHashFullBlocks_Unaligned
stmia r0!, {r3}
mov r1, r8
mov r2, r9
stmia r0!, {r1, r2}
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stmia r0!, {r1}
str r0, [sp, #XoodyakSqueeze_offsetY]
ldr r1, [sp, #XoodyakSqueeze_offsetYLen]
subs r1, r1, #16
str r1, [sp, #XoodyakSqueeze_offsetYLen]
bcs Xoodyak_SqueezeHashFullBlocks_Loop
ldr r0, [sp, #XoodyakSqueeze_offsetState] @ Save state
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakSqueeze_offsetInitialLen] @ Compute processed length
ldr r2, [sp, #XoodyakSqueeze_offsetYLen]
adds r2, r2, #16
subs r0, r0, r2
add sp, #XoodyakSqueeze_SAS @ Free stack and pop
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mStoreU r0, 0, r3, r2, locRegL
mStoreU r0, 4, r8, r2, locRegH
mStoreU r0, 8, r9, r2, locRegH
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03]
mStoreU r0, 12, r1, r2, locRegL
adds r0, r0, #16
b Xoodyak_SqueezeHashFullBlocks_EndLoop
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.equ XoodyakCrypt_offsetState , (Xoodoo_Permute_12rounds_SAS+0)
.equ XoodyakCrypt_offsetI , (Xoodoo_Permute_12rounds_SAS+4)
.equ XoodyakCrypt_offsetO , (Xoodoo_Permute_12rounds_SAS+8)
.equ XoodyakCrypt_offsetIOLen , (Xoodoo_Permute_12rounds_SAS+12)
.equ XoodyakCrypt_offsetInitialLen , (Xoodoo_Permute_12rounds_SAS+16)
.equ XoodyakCrypt_SAS , (Xoodoo_Permute_12rounds_SAS+20)
.align 4
.global Xoodyak_EncryptFullBlocks
.type Xoodyak_EncryptFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakCrypt_SAS
str r0, [sp, #XoodyakCrypt_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakCrypt_offsetI]
str r2, [sp, #XoodyakCrypt_offsetO]
str r3, [sp, #XoodyakCrypt_offsetInitialLen]
subs r3, r3, #24
str r3, [sp, #XoodyakCrypt_offsetIOLen]
ldr r5, =Xoodyak_EncryptFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
push {r4, r5}
ldr r5, [sp, #XoodyakCrypt_offsetI+8]
ldr r4, [sp, #XoodyakCrypt_offsetO+8]
mov r0, r4
ands r0, r0, r5
lsls r0, r0, #30
bne Xoodyak_EncryptFullBlocks_Unaligned
ldmia r5!, {r0}
eors r3, r3, r0
stmia r4!, {r3}
ldmia r5!, {r0}
mov r1, r8
eors r1, r1, r0
stmia r4!, {r1}
mov r8, r1
ldmia r5!, {r0}
mov r1, r9
eors r1, r1, r0
stmia r4!, {r1}
mov r9, r1
ldmia r5!, {r0}
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
eors r1, r1, r0
stmia r4!, {r1}
str r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
ldmia r5!, {r0}
mov r1, r10
eors r1, r1, r0
stmia r4!, {r1}
mov r10, r1
ldmia r5!, {r0}
mov r1, r11
eors r1, r1, r0
stmia r4!, {r1}
mov r11, r1
movs r0, #1
mov r1, r12
eors r1, r1, r0
mov r12, r1
str r5, [sp, #XoodyakCrypt_offsetI+8]
str r4, [sp, #XoodyakCrypt_offsetO+8]
pop {r4, r5}
ldr r1, [sp, #XoodyakCrypt_offsetIOLen]
subs r1, r1, #24
str r1, [sp, #XoodyakCrypt_offsetIOLen]
bcs Xoodyak_EncryptFullBlocks_Loop
ldr r0, [sp, #XoodyakCrypt_offsetState] @ Save state
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakCrypt_offsetInitialLen] @ Compute processed length
ldr r2, [sp, #XoodyakCrypt_offsetIOLen]
adds r2, r2, #24
subs r0, r0, r2
add sp, #XoodyakCrypt_SAS @ Free stack and pop
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mLoadU r0, r5, 0, r2
eors r3, r3, r0
mStoreU r4, 0, r3, r2, locRegL
mLoadU r0, r5, 4, r2
mov r1, r8
eors r1, r1, r0
mStoreU r4, 4, r1, r2, locRegL
mov r8, r1
mLoadU r0, r5, 8, r2
mov r1, r9
eors r1, r1, r0
mStoreU r4, 8, r1, r2, locRegL
mov r9, r1
mLoadU r0, r5, 12, r2
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
eors r1, r1, r0
mStoreU r4, 12, r1, r2, locRegL
str r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
mLoadU r0, r5, 16, r2
mov r1, r10
eors r1, r1, r0
mStoreU r4, 16, r1, r2, locRegL
mov r10, r1
mLoadU r0, r5, 20, r2
mov r1, r11
eors r1, r1, r0
mStoreU r4, 20, r1, r2, locRegL
mov r11, r1
adds r4, r4, #24
adds r5, r5, #24
b Xoodyak_EncryptFullBlocks_EndLoop
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.align 4
.global Xoodyak_DecryptFullBlocks
.type Xoodyak_DecryptFullBlocks, %function;
push {r3-r7,lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
sub sp, #XoodyakCrypt_SAS
str r0, [sp, #XoodyakCrypt_offsetState] @ setup variables on stack
str r1, [sp, #XoodyakCrypt_offsetI]
str r2, [sp, #XoodyakCrypt_offsetO]
str r3, [sp, #XoodyakCrypt_offsetInitialLen]
subs r3, r3, #24
str r3, [sp, #XoodyakCrypt_offsetIOLen]
ldr r5, =Xoodyak_DecryptFullBlocks_Ret+1
str r5, [sp, #Xoodoo_Permute_12rounds_offsetReturn]
ldm r0!, {r3,r5,r6,r7} @ state in registers
mov r8, r5
mov r9, r6
str r7, [sp, #Xoodoo_Permute_12rounds_offsetA03]
ldm r0!, {r4,r5,r6,r7}
mov r10, r4
mov r11, r5
mov r12, r6
mov lr, r7
ldm r0!, {r4,r5,r6,r7}
ldr r0, =Xoodoo_Permute_12roundsAsm
bx r0
.align 4
push {r4, r5}
ldr r5, [sp, #XoodyakCrypt_offsetI+8]
ldr r4, [sp, #XoodyakCrypt_offsetO+8]
mov r0, r4
ands r0, r0, r5
lsls r0, r0, #30
bne Xoodyak_DecryptFullBlocks_Unaligned
ldmia r5!, {r0}
eors r3, r3, r0
stmia r4!, {r3}
mov r3, r0
ldmia r5!, {r0}
mov r1, r8
eors r1, r1, r0
stmia r4!, {r1}
mov r8, r0
ldmia r5!, {r0}
mov r1, r9
eors r1, r1, r0
stmia r4!, {r1}
mov r9, r0
ldmia r5!, {r0}
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
eors r1, r1, r0
stmia r4!, {r1}
str r0, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
ldmia r5!, {r0}
mov r1, r10
eors r1, r1, r0
stmia r4!, {r1}
mov r10, r0
ldmia r5!, {r0}
mov r1, r11
eors r1, r1, r0
stmia r4!, {r1}
mov r11, r0
movs r0, #1
mov r1, r12
eors r1, r1, r0
mov r12, r1
str r5, [sp, #XoodyakCrypt_offsetI+8]
str r4, [sp, #XoodyakCrypt_offsetO+8]
pop {r4, r5}
ldr r1, [sp, #XoodyakCrypt_offsetIOLen]
subs r1, r1, #24
str r1, [sp, #XoodyakCrypt_offsetIOLen]
bcs Xoodyak_DecryptFullBlocks_Loop
ldr r0, [sp, #XoodyakCrypt_offsetState] @ Save state
stm r0!, {r3}
mov r1, r8
mov r2, r9
ldr r3, [sp, #Xoodoo_Permute_12rounds_offsetA03]
stm r0!, {r1,r2,r3}
mov r1, r10
mov r2, r11
mov r3, r12
stm r0!, {r1,r2,r3}
mov r1, lr
stm r0!, {r1,r4,r5,r6,r7}
ldr r0, [sp, #XoodyakCrypt_offsetInitialLen] @ Compute processed length
ldr r2, [sp, #XoodyakCrypt_offsetIOLen]
adds r2, r2, #24
subs r0, r0, r2
add sp, #XoodyakCrypt_SAS @ Free stack and pop
pop {r4-r7}
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
pop {r3-r7,pc}
mLoadU r0, r5, 0, r2
eors r3, r3, r0
mStoreU r4, 0, r3, r2, locRegL
mov r3, r0
mLoadU r0, r5, 4, r2
mov r1, r8
eors r1, r1, r0
mStoreU r4, 4, r1, r2, locRegL
mov r8, r0
mLoadU r0, r5, 8, r2
mov r1, r9
eors r1, r1, r0
mStoreU r4, 8, r1, r2, locRegL
mov r9, r0
mLoadU r0, r5, 12, r2
ldr r1, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
eors r1, r1, r0
mStoreU r4, 12, r1, r2, locRegL
str r0, [sp, #Xoodoo_Permute_12rounds_offsetA03+8]
mLoadU r0, r5, 16, r2
mov r1, r10
eors r1, r1, r0
mStoreU r4, 16, r1, r2, locRegL
mov r10, r0
mLoadU r0, r5, 20, r2
mov r1, r11
eors r1, r1, r0
mStoreU r4, 20, r1, r2, locRegL
mov r11, r0
adds r4, r4, #24
adds r5, r5, #24
b Xoodyak_DecryptFullBlocks_EndLoop
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifdef XoodooReference
#include "displayIntermediateValues.h"
#include <assert.h>
#include <string.h>
#include "Xoodyak.h"
#ifdef OUTPUT
#include <stdlib.h>
#include <string.h>
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length);
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length)
unsigned int i;
fprintf(f, "%s:", synopsis);
for(i=0; i<length; i++)
fprintf(f, " %02x", (unsigned int)data[i]);
fprintf(f, "\n");
#define MyMin(a,b) (((a) < (b)) ? (a) : (b))
#ifdef XKCP_has_Xoodoo
#include "Xoodoo-SnP.h"
#define SnP Xoodoo
#define SnP_Permute Xoodoo_Permute_12rounds
#define prefix Xoodyak
#include ""
#undef prefix
#undef SnP
#undef SnP_Permute
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_h_
#define _Xoodyak_h_
#include "config.h"
#ifdef XKCP_has_Xoodoo
#include <stddef.h>
#include "Cyclist.h"
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment)
#error This requires an implementation of Xoodoo
The eXtended Keccak Code Package (XKCP)
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _align_h_
#define _align_h_
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
#ifdef ALIGN
#undef ALIGN
#if defined(__GNUC__)
#define ALIGN(x) __attribute__ ((aligned(x)))
#elif defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#elif defined(__ARMCC_VERSION)
#define ALIGN(x) __align(x)
#define ALIGN(x)
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
#elif defined( LITTLE_ENDIAN )
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
#elif defined( _LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
#elif defined( __LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# endif
#elif defined( __BIG_ENDIAN__ )
#elif defined( __LITTLE_ENDIAN__ )
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \
defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ )
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# else
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
/* File generated by ToTargetConfigFile.xsl */
#define XKCP_has_Xoodyak
#define XKCP_has_Xoodoo
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include "crypto_hash.h"
#ifndef crypto_hash_BYTES
#define crypto_hash_BYTES 32
#include "Xoodyak.h"
int crypto_hash(unsigned char *out, const unsigned char *in, unsigned long long inlen)
Xoodyak_Instance instance;
Xoodyak_Initialize(&instance, NULL, 0, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, in, (size_t)inlen);
Xoodyak_Squeeze(&instance, out, crypto_hash_BYTES);
#if 0
unsigned int i;
for (i = 0; i < crypto_hash_BYTES; ++i )
printf("\\x%02x", out[i] );
return 0;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Cyclist_h_
#define _Cyclist_h_
#include <stdint.h>
#include "align.h"
#define Cyclist_ModeHash 1
#define Cyclist_ModeKeyed 2
#define Cyclist_PhaseDown 1
#define Cyclist_PhaseUp 2
#ifdef OUTPUT
#include <stdio.h>
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
uint8_t stateShadow[size]; \
FILE *file; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistFunctions(prefix) \
void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \
void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \
void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \
void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \
void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \
void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \
void prefix##_Ratchet(prefix##_Instance *instance);
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
#define SnP_Initialize JOIN(SnP, _Initialize)
#define SnP_AddBytes JOIN(SnP, _AddBytes)
#define SnP_AddByte JOIN(SnP, _AddByte)
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
#define Cyclist_Instance JOIN(prefix, _Instance)
#define Cyclist_Initialize JOIN(prefix, _Initialize)
#define Cyclist_Absorb JOIN(prefix, _Absorb)
#define Cyclist_Encrypt JOIN(prefix, _Encrypt)
#define Cyclist_Decrypt JOIN(prefix, _Decrypt)
#define Cyclist_Squeeze JOIN(prefix, _Squeeze)
#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey)
#define Cyclist_Ratchet JOIN(prefix, _Ratchet)
#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny)
#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey)
#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny)
#define Cyclist_Down JOIN(prefix, _Down)
#define Cyclist_Up JOIN(prefix, _Up)
#define Cyclist_Crypt JOIN(prefix, _Crypt)
#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime)
#define Cyclist_Rhash JOIN(prefix, _Rhash)
#define Cyclist_Rkin JOIN(prefix, _Rkin)
#define Cyclist_Rkout JOIN(prefix, _Rkout)
#define Cyclist_lRatchet JOIN(prefix, _lRatchet)
#if defined(CyclistFullBlocks_supported)
#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks)
#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks)
#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks)
#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks)
#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks)
#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks)
/* ------- Cyclist internal interfaces ------- */
static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd)
SnP_AddBytes(instance->state, Xi, 0, XiLen);
SnP_AddByte(instance->state, 0x01, XiLen);
SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1);
instance->phase = Cyclist_PhaseDown;
static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu)
#if defined(OUTPUT)
uint8_t s[Cyclist_f_bPrime];
if (instance->mode != Cyclist_ModeHash) {
SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1);
#if defined(OUTPUT)
if (instance->file != NULL) {
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime );
SnP_Permute( instance->state );
#if defined(OUTPUT)
if (instance->file != NULL) {
memcpy( instance->stateShadow, instance->state, sizeof(instance->state) );
fprintf( instance->file, "Data XORed" );
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
fprintf( instance->file, "After f() ");
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
instance->phase = Cyclist_PhaseUp;
SnP_ExtractBytes( instance->state, Yi, 0, YiLen );
static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd)
unsigned int splitLen;
do {
if (instance->phase != Cyclist_PhaseUp) {
Cyclist_Up(instance, NULL, 0, 0);
splitLen = MyMin(XLen, r);
Cyclist_Down(instance, X, splitLen, Cd);
Cd = 0;
X += splitLen;
XLen -= splitLen;
#if defined(CyclistFullBlocks_supported)
if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
} while ( XLen != 0 );
static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
uint8_t KID[Cyclist_Rkin];
assert(instance->mode == Cyclist_ModeHash);
assert((KLen + IDLen) <= (Cyclist_Rkin - 1));
instance->mode = Cyclist_ModeKeyed;
instance->Rabsorb = Cyclist_Rkin;
instance->Rsqueeze = Cyclist_Rkout;
if (KLen != 0) {
memcpy(KID, K, KLen);
memcpy(KID + KLen, ID, IDLen);
KID[KLen + IDLen] = (uint8_t)IDLen;
Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02);
if (counterLen != 0) {
Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00);
static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu)
unsigned int len;
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, Cu);
Y += len;
YLen -= len;
while (YLen != 0) {
#if defined(CyclistFullBlocks_supported)
if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
Cyclist_Down(instance, NULL, 0, 0);
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, 0);
Y += len;
YLen -= len;
static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt)
unsigned int splitLen;
uint8_t P[Cyclist_Rkout];
uint8_t Cu = 0x80;
do {
if (decrypt != 0) {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, O, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
else {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
memcpy(P, I, splitLen);
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, P, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
Cu = 0x00;
} while ( IOLen != 0 );
/* ------- Cyclist interfaces ------- */
void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
instance->phase = Cyclist_PhaseUp;
instance->mode = Cyclist_ModeHash;
instance->Rabsorb = Cyclist_Rhash;
instance->Rsqueeze = Cyclist_Rhash;
#ifdef OUTPUT
instance->file = 0;
SnP_Initialize( instance->stateShadow );
if (KLen != 0) {
Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen);
void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen)
Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03);
void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, P, C, PLen, 0);
void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, C, P, CLen, 1);
void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen)
Cyclist_SqueezeAny(instance, Y, YLen, 0x40);
void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_SqueezeAny(instance, K, KLen, 0x20);
void Cyclist_Ratchet(Cyclist_Instance *instance)
uint8_t buffer[Cyclist_lRatchet];
assert(instance->mode == Cyclist_ModeKeyed);
/* Squeeze then absorb is the same as overwriting with zeros */
Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10);
Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00);
#undef SnP_StaticInitialize
#undef SnP_Initialize
#undef SnP_AddBytes
#undef SnP_AddByte
#undef SnP_OverwriteBytes
#undef SnP_ExtractBytes
#undef SnP_ExtractAndAddBytes
#undef Cyclist_Instance
#undef Cyclist_Initialize
#undef Cyclist_Absorb
#undef Cyclist_Encrypt
#undef Cyclist_Decrypt
#undef Cyclist_Squeeze
#undef Cyclist_SqueezeKey
#undef Cyclist_Ratchet
#undef Cyclist_AbsorbAny
#undef Cyclist_AbsorbKey
#undef Cyclist_SqueezeAny
#undef Cyclist_Down
#undef Cyclist_Up
#undef Cyclist_Crypt
#undef Cyclist_f_bPrime
#undef Cyclist_Rhash
#undef Cyclist_Rkin
#undef Cyclist_Rkout
#undef Cyclist_lRatchet
#if defined(CyclistFullBlocks_supported)
#undef Cyclist_AbsorbKeyedFullBlocks
#undef Cyclist_AbsorbHashFullBlocks
#undef Cyclist_SqueezeKeyedFullBlocks
#undef Cyclist_SqueezeHashFullBlocks
#undef Cyclist_EncryptFullBlocks
#undef Cyclist_DecryptFullBlocks
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_SnP_h_
#define _Xoodoo_SnP_h_
#include <stddef.h>
#include <stdint.h>
/** For the documentation, see SnP-documentation.h.
#define Xoodoo_implementation "32-bit optimized ARM assembler implementation"
#define Xoodoo_stateSizeInBytes (3*4*4)
#define Xoodoo_stateAlignment 4
#define Xoodoo_StaticInitialize()
void Xoodoo_Initialize(void *state);
#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData)
void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount);
//void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds);
void Xoodoo_Permute_6rounds(void *state);
void Xoodoo_Permute_12rounds(void *state);
void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length);
#define Xoodoo_FastXoofff_supported
void Xoofff_AddIs(uint8_t *output, const uint8_t *input, size_t bitLen);
size_t Xoofff_CompressFastLoop(uint8_t *kRoll, uint8_t *xAccu, const uint8_t *input, size_t length);
size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length);
#define CyclistFullBlocks_supported
size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (Cortex-M3, ...).
.syntax unified
@ ----------------------------------------------------------------------------
@ void Xoodoo_Initialize(void *state)
.align 4
.global Xoodoo_Initialize
.type Xoodoo_Initialize, %function;
movs r1, #0
movs r2, #0
movs r3, #0
movs r12, #0
stmia r0!, { r1 - r3, r12 }
stmia r0!, { r1 - r3, r12 }
stmia r0!, { r1 - r3, r12 }
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_AddBytes
.type Xoodoo_AddBytes, %function;
push {r4,lr}
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_AddBytes_Bytes
Xoodoo_AddBytes_LanesLoop: @ then, perform on lanes
ldr r2, [r0]
ldr r4, [r1], #4
eors r2, r2, r4
str r2, [r0], #4
subs r3, r3, #4
bcs Xoodoo_AddBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_AddBytes_Exit
ldrb r2, [r0]
ldrb r4, [r1], #1
eors r2, r2, r4
strb r2, [r0], #1
subs r3, r3, #1
bcs Xoodoo_AddBytes_BytesLoop
pop {r4,pc}
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_OverwriteBytes
.type Xoodoo_OverwriteBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_OverwriteBytes_Bytes
Xoodoo_OverwriteBytes_LanesLoop: @ then, perform on words
ldr r2, [r1], #4
str r2, [r0], #4
subs r3, r3, #4
bcs Xoodoo_OverwriteBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_OverwriteBytes_Exit
ldrb r2, [r1], #1
strb r2, [r0], #1
subs r3, r3, #1
bcs Xoodoo_OverwriteBytes_BytesLoop
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
.global Xoodoo_OverwriteWithZeroes
.type Xoodoo_OverwriteWithZeroes, %function;
movs r3, #0
lsrs r2, r1, #2
beq Xoodoo_OverwriteWithZeroes_Bytes
str r3, [r0], #4
subs r2, r2, #1
bne Xoodoo_OverwriteWithZeroes_LoopLanes
ands r1, #3
beq Xoodoo_OverwriteWithZeroes_Exit
strb r3, [r0], #1
subs r1, r1, #1
bne Xoodoo_OverwriteWithZeroes_LoopBytes
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
.global Xoodoo_ExtractBytes
.type Xoodoo_ExtractBytes, %function;
adds r0, r0, r2 @ state += offset
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractBytes_Bytes
Xoodoo_ExtractBytes_LanesLoop: @ then, handle words
ldr r2, [r0], #4
str r2, [r1], #4
subs r3, r3, #4
bcs Xoodoo_ExtractBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_ExtractBytes_Exit
ldrb r2, [r0], #1
strb r2, [r1], #1
subs r3, r3, #1
bcs Xoodoo_ExtractBytes_BytesLoop
bx lr
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
.global Xoodoo_ExtractAndAddBytes
.type Xoodoo_ExtractAndAddBytes, %function;
push {r4,r5}
adds r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length)
ldr r3, [sp, #8] @ get length argument from stack
subs r3, r3, #4 @ .if length >= 4
bcc Xoodoo_ExtractAndAddBytes_Bytes
Xoodoo_ExtractAndAddBytes_LanesLoop: @ then, handle words
ldr r5, [r0], #4
ldr r4, [r1], #4
eors r5, r5, r4
str r5, [r2], #4
subs r3, r3, #4
bcs Xoodoo_ExtractAndAddBytes_LanesLoop
adds r3, r3, #3
bcc Xoodoo_ExtractAndAddBytes_Exit
ldrb r5, [r0], #1
ldrb r4, [r1], #1
eors r5, r5, r4
strb r5, [r2], #1
subs r3, r3, #1
bcs Xoodoo_ExtractAndAddBytes_BytesLoop
pop {r4,r5}
bx lr
.align 4
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _t3 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
.equ _rc12 , 0x00000058
.equ _rc11 , 0x00000038
.equ _rc10 , 0x000003C0
.equ _rc9 , 0x000000D0
.equ _rc8 , 0x00000120
.equ _rc7 , 0x00000014
.equ _rc6 , 0x00000060
.equ _rc5 , 0x0000002C
.equ _rc4 , 0x00000380
.equ _rc3 , 0x000000F0
.equ _rc2 , 0x000001A0
.equ _rc1 , 0x00000012
.equ _rc6x1, 0x00000003
.equ _rc5x2, 0x0b000000
.equ _rc4x3, 0x07000000
.equ _rc3x4, 0x000f0000
.equ _rc2x5, 0x0000d000
.equ _rc1x6, 0x00000048
.equ _rc12x1, 0xc0000002
.equ _rc11x2, 0x0e000000
.equ _rc10x3, 0x07800000
.equ _rc9x4 , 0x000d0000
.equ _rc8x5 , 0x00009000
.equ _rc7x6 , 0x00000050
.equ _rc6x7 , 0x0000000c
.equ _rc5x8 , 0x2c000000
.equ _rc4x9 , 0x1c000000
.equ _rc3x10, 0x003c0000
.equ _rc2x11, 0x00034000
.equ _rc1x12, 0x00000120
@ ----------------------------------------------------------------------------
.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2
.if ((\rho_e1)%32) == 0
eors \ro, \a0, \a1
eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32
.if ((\rho_e2)%32) == 0
eors \ro, \ro, \a2
eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32
.macro mRliXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ro, \ri, ROR #(32-(\rot))%32
.macro mRloXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ri, \ro, ROR #(32-(\rot))%32
.macro mChi3 a0,a1,a2,r0,r1
bic \r0, \a2, \a1, ROR #_w1
eors \a0, \a0, \r0, ROR #32-_w1
bic \r1, \a0, \a2, ROR #32-_w1
eors \a1, \a1, \r1
bic \r1, \a1, \a0
eors \a2, \a2, \r1, ROR #_w1
.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc
@ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations)
mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2
mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r2, r0, 32-_r0
mRloXor \r6i, r0, \rho_e1-_r0
mRloXor \r10i, r0, \rho_we2-_r0
mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2
mRliXor r1, r1, _r1-_r0
mRloXor r3, r1, 32-_r0
mRloXor \r7i, r1, \rho_e1-_r0
mRloXor \r11i, r1, \rho_we2-_r0
mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r4, r0, 32-_r0
mRloXor \r8i, r0, \rho_e1-_r0
mRloXor \r12i, r0, \rho_we2-_r0
mRliXor r1, r1, _r1-_r0
mRloXor r5, r1, 32-_r0
mRloXor \r9i, r1, \rho_e1-_r0
mRloXor \lri, r1, \rho_we2-_r0
@ After Theta the whole state is rotated -r0
@ from here we must use a1.w instead of a1.i
@ Iota: round constant
.if \rc == 0xc0000002
eor r2, r2, #0x00000002
eor r2, r2, #0xc0000000
eor r2, r2, #\rc
@ Chi: non linear step, on colums
mChi3 r2, \r6w, \r10i, r0, r1
mChi3 r3, \r7w, \r11i, r0, r1
mChi3 r4, \r8w, \r12i, r0, r1
mChi3 r5, \r9w, \lri, r0, r1
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_6rounds( void *state )
.global Xoodoo_Permute_6rounds
.type Xoodoo_Permute_6rounds, %function;
push {r0,r4-r11,lr}
ldmia r0!, {r2-r5}
ldmia r0!, {r8-r9}
ldmia r0!, {r6-r7}
ldmia r0, {r10-r12,lr}
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
pop {r0,r1}
ror r2, r2, #32-(6*_r0)%32
ror r3, r3, #32-(6*_r0)%32
ror r4, r4, #32-(6*_r0)%32
ror r5, r5, #32-(6*_r0)%32
ror r6, r6, #32-(6*_r0+1)%32
ror r7, r7, #32-(6*_r0+1)%32
ror r8, r8, #32-(6*_r0+1)%32
ror r9, r9, #32-(6*_r0+1)%32
ror r10, r10, #32-(6*_r0+_e1+_w1)%32
ror r11, r11, #32-(6*_r0+_e1+_w1)%32
ror r12, r12, #32-(6*_r0+_e1+_w1)%32
ror lr, lr, #32-(6*_r0+_e1+_w1)%32
stmia r0, {r2-r12,lr}
mov r4, r1
pop {r5-r11,pc}
.align 4
@ ----------------------------------------------------------------------------
@ void Xoodoo_Permute_12rounds( void *state )
.global Xoodoo_Permute_12rounds
.type Xoodoo_Permute_12rounds, %function;
push {r0,r4-r11,lr}
ldmia r0, {r2-r12,lr}
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12
ror r2, r2, #32-(12*_r0)%32
ror r3, r3, #32-(12*_r0)%32
ror r4, r4, #32-(12*_r0)%32
ror r5, r5, #32-(12*_r0)%32
ror r6, r6, #32-(12*_r0+1)%32
ror r7, r7, #32-(12*_r0+1)%32
ror r8, r8, #32-(12*_r0+1)%32
ror r9, r9, #32-(12*_r0+1)%32
ror r10, r10, #32-(12*_r0+_e1+_w1)%32
ror r11, r11, #32-(12*_r0+_e1+_w1)%32
ror r12, r12, #32-(12*_r0+_e1+_w1)%32
ror lr, lr, #32-(12*_r0+_e1+_w1)%32
pop {r0,r1}
stmia r0, {r2-r12,lr}
mov r4, r1
pop {r5-r11,pc}
.align 4
.equ Xoofff_BlockSize , 3*4*4
@ ----------------------------------------------------------------------------
@ void Xoofff_AddIs(BitSequence *output, const BitSequence *input, BitLength bitLen)
.global Xoofff_AddIs
.type Xoofff_AddIs, %function;
push {r4-r10,lr}
subs r2, r2, #Xoofff_BlockSize*8
bcc Xoofff_AddIs_LessThanBlock
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
subs r2, r2, #Xoofff_BlockSize*8
bcs Xoofff_AddIs_BlockLoop
adds r2, r2, #Xoofff_BlockSize*8
beq Xoofff_AddIs_Return
subs r2, r2, #16*8
bcc Xoofff_AddIs_LessThan16
ldr r3, [r0, #0]
ldr r4, [r0, #4]
ldr r5, [r0, #8]
ldr r6, [r0, #12]
ldr r7, [r1], #4
ldr r8, [r1], #4
ldr r9, [r1], #4
ldr r10, [r1], #4
eor r3, r3, r7
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
subs r2, r2, #16*8
bcs Xoofff_AddIs_16Loop
adds r2, r2, #16*8
beq Xoofff_AddIs_Return
subs r2, r2, #4*8
bcc Xoofff_AddIs_LessThan4
ldr r3, [r0]
ldr r7, [r1], #4
eors r3, r3, r7
str r3, [r0], #4
subs r2, r2, #4*8
bcs Xoofff_AddIs_4Loop
adds r2, r2, #4*8
beq Xoofff_AddIs_Return
subs r2, r2, #8
bcc Xoofff_AddIs_LessThan1
ldrb r3, [r0]
ldrb r7, [r1], #1
eors r3, r3, r7
strb r3, [r0], #1
subs r2, r2, #8
bcs Xoofff_AddIs_1Loop
adds r2, r2, #8
beq Xoofff_AddIs_Return
ldrb r3, [r0]
ldrb r7, [r1]
movs r1, #1
eors r3, r3, r7
lsls r1, r1, r2
subs r1, r1, #1
ands r3, r3, r1
strb r3, [r0]
pop {r4-r10,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoofff_CompressFastLoop(unsigned char *kRoll, unsigned char *xAccu, const unsigned char *input, size_t length)
.equ Xoofff_Compress_kRoll , 0
.equ Xoofff_Compress_input , 4
.equ Xoofff_Compress_xAccu , 8
.equ Xoofff_Compress_iInput , 12
.equ Xoofff_Compress_length , 16
.global Xoofff_CompressFastLoop
.type Xoofff_CompressFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r12,lr}
push {r0,r2}
ldmia r0, {r2-r12,lr} @ get initial kRoll
ldr r0, [sp, #Xoofff_Compress_input] @ add input
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r5, r5, r1
ldr r1, [r0], #4
eors r6, r6, r1
ldr r1, [r0], #4
eors r7, r7, r1
ldr r1, [r0], #4
eors r8, r8, r1
ldr r1, [r0], #4
eors r9, r9, r1
ldr r1, [r0], #4
eors r10, r10, r1
ldr r1, [r0], #4
eors r11, r11, r1
ldr r1, [r0], #4
eors r12, r12, r1
ldr r1, [r0], #4
eors lr, lr, r1
str r0, [sp, #Xoofff_Compress_input]
@ permutation
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
@ Extract and add into xAccu
ldr r0, [sp, #Xoofff_Compress_xAccu]
ldr r1, [r0]
mRloXor r2, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r2, [r0], #4
mRloXor r3, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r3, [r0], #4
mRloXor r4, r1, (6*_r0)%32
ldr r1, [r0, #4]
str r4, [r0], #4
mRloXor r5, r1, (6*_r0)%32
str r5, [r0], #4
ldm r0, {r2-r5} @ note that r6-r8 and r7-r9 are swapped
mRliXor r2, r8, (6*_r0+1)%32
mRliXor r3, r9, (6*_r0+1)%32
mRliXor r4, r6, (6*_r0+1)%32
mRliXor r5, r7, (6*_r0+1)%32
stm r0!, {r2-r5}
ldm r0, {r2-r5}
mRliXor r2, r10, (6*_r0+_e1+_w1)%32
mRliXor r3, r11, (6*_r0+_e1+_w1)%32
mRliXor r4, r12, (6*_r0+_e1+_w1)%32
mRliXor r5, lr, (6*_r0+_e1+_w1)%32
stm r0!, {r2-r5}
@roll kRoll
ldr r0, [sp, #Xoofff_Compress_kRoll]
ldr lr, [r0], #4
ldmia r0!, {r10-r12}
ldmia r0!, {r2-r9}
eors lr, lr, lr, LSL #13
eors lr, lr, r2, ROR #32-3
sub r0, #Xoofff_BlockSize
stmia r0, {r2-r12,lr}
@ loop management
ldr r0, [sp, #Xoofff_Compress_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_Compress_length]
bcs Xoofff_CompressFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_Compress_input]
ldr r1, [sp, #Xoofff_Compress_iInput]
sub r0, r0, r1
pop {r1,r2}
pop {r1-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoofff_ExpandFastLoop(unsigned char *yAccu, const unsigned char *kRoll, unsigned char *output, size_t length)
.equ Xoofff_Expand_yAccu , 0
.equ Xoofff_Expand_output , 4
.equ Xoofff_Expand_kRoll , 8
.equ Xoofff_Expand_iOutput , 12
.equ Xoofff_Expand_length , 16
.global Xoofff_ExpandFastLoop
.type Xoofff_ExpandFastLoop, %function;
subs r3, #Xoofff_BlockSize @ length must be greater than block size
push {r1-r12,lr}
push {r0,r2}
ldmia r0, {r2-r12,lr} @ get initial yAccu
@ permutation
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc6x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc5x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc4x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc3x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc2x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc1x6
@ Add k and extract
ldr r0, [sp, #Xoofff_Expand_kRoll]
ldr r1, [r0], #4
mRloXor r2, r1, (6*_r0)%32
ldr r1, [sp, #Xoofff_Expand_output]
str r2, [r1], #4
ldr r2, [r0], #4
mRloXor r3, r2, (6*_r0)%32
ldr r2, [r0], #4
str r3, [r1], #4
mRloXor r4, r2, (6*_r0)%32
ldr r2, [r0], #4
str r4, [r1], #4
mRloXor r5, r2, (6*_r0)%32
str r5, [r1], #4
ldm r0!, {r2-r5} @ Note that r6-r8 and r7-r9 are swapped
mRliXor r2, r8, (6*_r0+1)%32
str r2, [r1], #4
mRliXor r3, r9, (6*_r0+1)%32
str r3, [r1], #4
mRliXor r4, r6, (6*_r0+1)%32
str r4, [r1], #4
mRliXor r5, r7, (6*_r0+1)%32
str r5, [r1], #4
ldm r0!, {r2-r5}
mRliXor r2, r10, (6*_r0+_e1+_w1)%32
str r2, [r1], #4
mRliXor r3, r11, (6*_r0+_e1+_w1)%32
str r3, [r1], #4
mRliXor r4, r12, (6*_r0+_e1+_w1)%32
str r4, [r1], #4
mRliXor r5, lr, (6*_r0+_e1+_w1)%32
str r5, [r1], #4
@ roll-e yAccu
ldr r0, [sp, #Xoofff_Expand_yAccu]
str r1, [sp, #Xoofff_Expand_output]
ldr lr, [r0], #4
ldmia r0!, {r10-r12}
ldmia r0!, {r2-r9}
and r1, r6, r2
eor lr, r1, lr, ROR #32-5
eor lr, lr, r2, ROR #32-13
eor lr, lr, #7
sub r0, #Xoofff_BlockSize
stmia r0, {r2-r12,lr}
@ loop management
ldr r0, [sp, #Xoofff_Expand_length]
subs r0, #Xoofff_BlockSize
str r0, [sp, #Xoofff_Expand_length]
bcs Xoofff_ExpandFastLoop_Loop
@ return number of bytes processed
ldr r0, [sp, #Xoofff_Expand_output]
ldr r1, [sp, #Xoofff_Expand_iOutput]
sub r0, r0, r1
pop {r1,r2}
pop {r1-r12,pc}
.align 4
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_h_
#define _Xoodoo_h_
#include <stdint.h>
#include <stdlib.h>
#define MAXROUNDS 12
#define NROWS 3
#define NCOLUMS 4
/* Round constants */
#define _rc12 0x00000058
#define _rc11 0x00000038
#define _rc10 0x000003C0
#define _rc9 0x000000D0
#define _rc8 0x00000120
#define _rc7 0x00000014
#define _rc6 0x00000060
#define _rc5 0x0000002C
#define _rc4 0x00000380
#define _rc3 0x000000F0
#define _rc2 0x000001A0
#define _rc1 0x00000012
#if !defined(ROTL32)
#if defined (__arm__) && !defined(__GNUC__)
#define ROTL32(a, offset) __ror(a, (32-(offset))%32)
#elif defined(_MSC_VER)
#define ROTL32(a, offset) _rotl(a, (offset)%32)
#define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32)))
#if !defined(READ32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress)))
#elif defined(_MSC_VER)
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#if !defined(WRITE32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData))
#elif defined(_MSC_VER)
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#if !defined(index)
#define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS))
typedef uint32_t tXoodooLane;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_parameters_h_
#define _Xoodyak_parameters_h_
#define Xoodyak_f_bPrime 48
#define Xoodyak_Rhash 16
#define Xoodyak_Rkin 44
#define Xoodyak_Rkout 24
#define Xoodyak_lRatchet 16
@ The eXtended Keccak Code Package (XKCP)
@ The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@ For more information, feedback or questions, please refer to the Keccak Team website:
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (Cortex-M3, ...).
.syntax unified
@ ----------------------------------------------------------------------------
.equ _r0 , 5
.equ _r1 , 14
.equ _t3 , 1
.equ _w1 , 11
.equ _e0 , 2
.equ _e1 , 8
.equ _rc12 , 0x00000058
.equ _rc11 , 0x00000038
.equ _rc10 , 0x000003C0
.equ _rc9 , 0x000000D0
.equ _rc8 , 0x00000120
.equ _rc7 , 0x00000014
.equ _rc6 , 0x00000060
.equ _rc5 , 0x0000002C
.equ _rc4 , 0x00000380
.equ _rc3 , 0x000000F0
.equ _rc2 , 0x000001A0
.equ _rc1 , 0x00000012
.equ _rc6x1 , 0x00000003
.equ _rc5x2 , 0x0b000000
.equ _rc4x3 , 0x07000000
.equ _rc3x4 , 0x000f0000
.equ _rc2x5 , 0x0000d000
.equ _rc1x6 , 0x00000048
.equ _rc12x1, 0xc0000002
.equ _rc11x2, 0x0e000000
.equ _rc10x3, 0x07800000
.equ _rc9x4 , 0x000d0000
.equ _rc8x5 , 0x00009000
.equ _rc7x6 , 0x00000050
.equ _rc6x7 , 0x0000000c
.equ _rc5x8 , 0x2c000000
.equ _rc4x9 , 0x1c000000
.equ _rc3x10, 0x003c0000
.equ _rc2x11, 0x00034000
.equ _rc1x12, 0x00000120
@ ----------------------------------------------------------------------------
.macro mXor3 ro, a0, a1, a2, rho_e1, rho_e2
.if ((\rho_e1)%32) == 0
eors \ro, \a0, \a1
eor \ro, \a0, \a1, ROR #(32-(\rho_e1))%32
.if ((\rho_e2)%32) == 0
eors \ro, \ro, \a2
eor \ro, \ro, \a2, ROR #(32-(\rho_e2))%32
.macro mRliXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ro, \ri, ROR #(32-(\rot))%32
.macro mRloXor ro, ri, rot
.if ((\rot)%32) == 0
eors \ro, \ro, \ri
eor \ro, \ri, \ro, ROR #(32-(\rot))%32
.macro mChi3 a0,a1,a2,r0,r1
bic \r0, \a2, \a1, ROR #_w1
eors \a0, \a0, \r0, ROR #32-_w1
bic \r1, \a0, \a2, ROR #32-_w1
eors \a1, \a1, \r1
bic \r1, \a1, \a0
eors \a2, \a2, \r1, ROR #_w1
.macro mRound r6i, r7i, r8i, r9i, r6w, r7w, r8w, r9w, r10i, r11i, r12i, lri, rho_e1, rho_we2, rc
@ Theta: Column Parity Mixer (with late Rho-west, Rho-east bit rotations)
mXor3 r0, r5, \r9i, \lri, \rho_e1, \rho_we2
mXor3 r1, r2, \r6i, \r10i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r2, r0, 32-_r0
mRloXor \r6i, r0, \rho_e1-_r0
mRloXor \r10i, r0, \rho_we2-_r0
mXor3 r0, r3, \r7i, \r11i, \rho_e1, \rho_we2
mRliXor r1, r1, _r1-_r0
mRloXor r3, r1, 32-_r0
mRloXor \r7i, r1, \rho_e1-_r0
mRloXor \r11i, r1, \rho_we2-_r0
mXor3 r1, r4, \r8i, \r12i, \rho_e1, \rho_we2
mRliXor r0, r0, _r1-_r0
mRloXor r4, r0, 32-_r0
mRloXor \r8i, r0, \rho_e1-_r0
mRloXor \r12i, r0, \rho_we2-_r0
mRliXor r1, r1, _r1-_r0
mRloXor r5, r1, 32-_r0
mRloXor \r9i, r1, \rho_e1-_r0
mRloXor \lri, r1, \rho_we2-_r0
@ After Theta the whole state is rotated -r0
@ from here we must use a1.w instead of a1.i
@ Iota: round constant
.if \rc == 0xc0000002
eor r2, r2, #0x00000002
eor r2, r2, #0xc0000000
eor r2, r2, #\rc
@ Chi: non linear step, on colums
mChi3 r2, \r6w, \r10i, r0, r1
mChi3 r3, \r7w, \r11i, r0, r1
mChi3 r4, \r8w, \r12i, r0, r1
mChi3 r5, \r9w, \lri, r0, r1
.equ offsetInstance , 0
.equ offsetInitialLen , 16
.equ offsetReturn , 20
@ ----------------------------------------------------------------------------
@ Xoodoo_Permute_12roundsAsm: only callable from asm
.type Xoodoo_Permute_12roundsAsm, %function;
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 32, 32, _rc12x1
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc11x2
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc10x3
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc9x4
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc8x5
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc7x6
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc6x7
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc5x8
mRound r6, r7, r8, r9, r9, r6, r7, r8, r10, r11, r12, lr, 1, _e1+_w1, _rc4x9
mRound r9, r6, r7, r8, r8, r9, r6, r7, r12, lr, r10, r11, 1, _e1+_w1, _rc3x10
mRound r8, r9, r6, r7, r7, r8, r9, r6, r10, r11, r12, lr, 1, _e1+_w1, _rc2x11
mRound r7, r8, r9, r6, r6, r7, r8, r9, r12, lr, r10, r11, 1, _e1+_w1, _rc1x12
ror r2, r2, #32-(12*_r0)%32
ror r3, r3, #32-(12*_r0)%32
ror r4, r4, #32-(12*_r0)%32
ror r5, r5, #32-(12*_r0)%32
ror r6, r6, #32-(12*_r0+1)%32
ror r7, r7, #32-(12*_r0+1)%32
ror r8, r8, #32-(12*_r0+1)%32
ror r9, r9, #32-(12*_r0+1)%32
ror r10, r10, #32-(12*_r0+_e1+_w1)%32
ror r11, r11, #32-(12*_r0+_e1+_w1)%32
ror r12, r12, #32-(12*_r0+_e1+_w1)%32
ror lr, lr, #32-(12*_r0+_e1+_w1)%32
ldr pc, [sp, #offsetReturn]
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rkin)@ /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rkin)@
@ X += Xoodyak_Rkin@
@ XLen -= Xoodyak_Rkin@
@ } while (XLen >= Xoodyak_Rkin)@
@ return initialLength - XLen@
@ }
.equ offsetAbsorbX , 4
.equ offsetAbsorbXLen , 8
.global Xoodyak_AbsorbKeyedFullBlocks
.type Xoodyak_AbsorbKeyedFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #44
ldr r5, =Xoodyak_AbsorbKeyedFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
b.w Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetAbsorbX]
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r5, r5, r1
ldr r1, [r0], #4
eors r6, r6, r1
ldr r1, [r0], #4
eors r7, r7, r1
ldr r1, [r0], #4
eors r8, r8, r1
ldr r1, [r0], #4
eors r9, r9, r1
ldr r1, [r0], #4
eors r10, r10, r1
ldr r1, [r0], #4
eors r11, r11, r1
ldr r1, [r0], #4
eors lr, lr, #1
eors r12, r12, r1
ldr r1, [sp, #offsetAbsorbXLen]
str r0, [sp, #offsetAbsorbX]
subs r1, r1, #44
str r1, [sp, #offsetAbsorbXLen]
bcs Xoodyak_AbsorbKeyedFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #44
sub r0, r4, r2
pop {r4-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen)
@ {
@ size_t initialLength = XLen@
@ do {
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, NULL, 0, 0)@ */
@ SnP_AddBytes(state, X, 0, Xoodyak_Rhash)@ /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0)@ */
@ SnP_AddByte(state, 0x01, Xoodyak_Rhash)@
@ X += Xoodyak_Rhash@
@ XLen -= Xoodyak_Rhash@
@ } while (XLen >= Xoodyak_Rhash)@
@ return initialLength - XLen@
@ }
.global Xoodyak_AbsorbHashFullBlocks
.type Xoodyak_AbsorbHashFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #16
ldr r5, =Xoodyak_AbsorbHashFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
b.w Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetAbsorbX]
ldr r1, [r0], #4
eors r2, r2, r1
ldr r1, [r0], #4
eors r3, r3, r1
ldr r1, [r0], #4
eors r4, r4, r1
ldr r1, [r0], #4
eors r6, r6, #1
eors r5, r5, r1
ldr r1, [sp, #offsetAbsorbXLen]
str r0, [sp, #offsetAbsorbX]
subs r1, r1, #16
str r1, [sp, #offsetAbsorbXLen]
bcs Xoodyak_AbsorbHashFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #16
sub r0, r4, r2
pop {r4-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state )@ /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout)@
@ Y += Xoodyak_Rkout@
@ YLen -= Xoodyak_Rkout@
@ } while (YLen >= Xoodyak_Rkout)@
@ return initialLength - YLen@
@ }
.equ offsetSqueezeY , 4
.equ offsetSqueezeYLen , 8
.global Xoodyak_SqueezeKeyedFullBlocks
.type Xoodyak_SqueezeKeyedFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #24
ldr r5, =Xoodyak_SqueezeKeyedFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
eors r2, r2, #1
b.w Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetSqueezeY]
str r2, [r0], #4
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
str r7, [r0], #4
ldr r1, [sp, #offsetSqueezeYLen]
str r0, [sp, #offsetSqueezeY]
subs r1, r1, #24
str r1, [sp, #offsetSqueezeYLen]
bcs Xoodyak_SqueezeKeyedFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #24
sub r0, r4, r2
pop {r4-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen)
@ {
@ size_t initialLength = YLen@
@ do {
@ SnP_AddByte(state, 0x01, 0)@ /* Xoodyak_Down(instance, NULL, 0, 0)@ */
@ SnP_Permute(state)@ /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0)@ */
@ SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash)@
@ Y += Xoodyak_Rhash@
@ YLen -= Xoodyak_Rhash@
@ } while (YLen >= Xoodyak_Rhash)@
@ return initialLength - YLen@
@ }
.global Xoodyak_SqueezeHashFullBlocks
.type Xoodyak_SqueezeHashFullBlocks, %function;
push {r4-r12,lr}
mov r4, r2 @ r4 initialLength
subs r2, r2, #16
ldr r5, =Xoodyak_SqueezeHashFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
eors r2, r2, #1
b.w Xoodoo_Permute_12roundsAsm
ldr r0, [sp, #offsetSqueezeY]
str r2, [r0], #4
str r3, [r0], #4
str r4, [r0], #4
str r5, [r0], #4
ldr r1, [sp, #offsetSqueezeYLen]
str r0, [sp, #offsetSqueezeY]
subs r1, r1, #16
str r1, [sp, #offsetSqueezeYLen]
bcs Xoodyak_SqueezeHashFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r2, r2, #16
sub r0, r4, r2
pop {r4-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.equ offsetCryptI , 4+8
.equ offsetCryptO , 8+8
.equ offsetCryptIOLen , 12
.global Xoodyak_EncryptFullBlocks
.type Xoodyak_EncryptFullBlocks, %function;
push {r4-r12,lr}
mov r4, r3 @ r4 initialLength
subs r3, r3, #24
ldr r5, =Xoodyak_EncryptFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
b.w Xoodoo_Permute_12roundsAsm
push {r10, r11}
ldr r11, [sp, #offsetCryptI]
ldr r10, [sp, #offsetCryptO]
ldr r0, [r11], #4
ldr r1, [r11], #4
eors r2, r2, r0
str r2, [r10], #4
eors r3, r3, r1
ldr r0, [r11], #4
str r3, [r10], #4
eors r4, r4, r0
ldr r1, [r11], #4
str r4, [r10], #4
eors r5, r5, r1
ldr r0, [r11], #4
str r5, [r10], #4
eors r6, r6, r0
ldr r1, [r11], #4
str r6, [r10], #4
eors r7, r7, r1
str r7, [r10], #4
str r10, [sp, #offsetCryptO]
str r11, [sp, #offsetCryptI]
pop {r10, r11}
ldr r0, [sp, #offsetCryptIOLen]
eors r8, r8, #1
subs r0, r0, #24
str r0, [sp, #offsetCryptIOLen]
bcs Xoodyak_EncryptFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r3, r3, #24
sub r0, r4, r3
pop {r4-r12,pc}
.align 4
@ ----------------------------------------------------------------------------
@ size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
@ {
@ size_t initialLength = IOLen@
@ do {
@ SnP_Permute(state)@
@ SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout)@
@ SnP_AddBytes(state, O, 0, Xoodyak_Rkout)@
@ SnP_AddByte(state, 0x01, Xoodyak_Rkout)@
@ I += Xoodyak_Rkout@
@ O += Xoodyak_Rkout@
@ IOLen -= Xoodyak_Rkout@
@ } while (IOLen >= Xoodyak_Rkout)@
@ return initialLength - IOLen@
@ }
.global Xoodyak_DecryptFullBlocks
.type Xoodyak_DecryptFullBlocks, %function;
push {r4-r12,lr}
mov r4, r3 @ r4 initialLength
subs r3, r3, #24
ldr r5, =Xoodyak_DecryptFullBlocks_Ret+1
push {r0-r5}
ldmia r0, {r2-r12,lr}
b.w Xoodoo_Permute_12roundsAsm
push {r10, r11}
ldr r11, [sp, #offsetCryptI]
ldr r10, [sp, #offsetCryptO]
ldr r0, [r11], #4
ldr r1, [r11], #4
eors r2, r2, r0
str r2, [r10], #4
mov r2, r0
eors r3, r3, r1
ldr r0, [r11], #4
str r3, [r10], #4
mov r3, r1
eors r4, r4, r0
ldr r1, [r11], #4
str r4, [r10], #4
mov r4, r0
eors r5, r5, r1
ldr r0, [r11], #4
str r5, [r10], #4
mov r5, r1
eors r6, r6, r0
ldr r1, [r11], #4
str r6, [r10], #4
mov r6, r0
eors r7, r7, r1
str r7, [r10], #4
mov r7, r1
str r10, [sp, #offsetCryptO]
str r11, [sp, #offsetCryptI]
pop {r10, r11}
ldr r0, [sp, #offsetCryptIOLen]
eors r8, r8, #1
subs r0, r0, #24
str r0, [sp, #offsetCryptIOLen]
bcs Xoodyak_DecryptFullBlocks_Loop
ldr r0, [sp, #offsetInstance]
stmia r0, {r2-r12,lr}
pop {r0-r5}
adds r3, r3, #24
sub r0, r4, r3
pop {r4-r12,pc}
.align 4
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifdef XoodooReference
#include "displayIntermediateValues.h"
#include <assert.h>
#include <string.h>
#include "Xoodyak.h"
#ifdef OUTPUT
#include <stdlib.h>
#include <string.h>
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length);
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length)
unsigned int i;
fprintf(f, "%s:", synopsis);
for(i=0; i<length; i++)
fprintf(f, " %02x", (unsigned int)data[i]);
fprintf(f, "\n");
#define MyMin(a,b) (((a) < (b)) ? (a) : (b))
#ifdef XKCP_has_Xoodoo
#include "Xoodoo-SnP.h"
#define SnP Xoodoo
#define SnP_Permute Xoodoo_Permute_12rounds
#define prefix Xoodyak
#include ""
#undef prefix
#undef SnP
#undef SnP_Permute
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_h_
#define _Xoodyak_h_
#include "config.h"
#ifdef XKCP_has_Xoodoo
#include <stddef.h>
#include "Cyclist.h"
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment)
#error This requires an implementation of Xoodoo
The eXtended Keccak Code Package (XKCP)
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _align_h_
#define _align_h_
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
#ifdef ALIGN
#undef ALIGN
#if defined(__GNUC__)
#define ALIGN(x) __attribute__ ((aligned(x)))
#elif defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#elif defined(__ARMCC_VERSION)
#define ALIGN(x) __align(x)
#define ALIGN(x)
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
#elif defined( LITTLE_ENDIAN )
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
#elif defined( _LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
#elif defined( __LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# endif
#elif defined( __BIG_ENDIAN__ )
#elif defined( __LITTLE_ENDIAN__ )
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \
defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ )
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# else
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
/* File generated by ToTargetConfigFile.xsl */
#define XKCP_has_Xoodyak
#define XKCP_has_Xoodoo
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include "crypto_hash.h"
#ifndef crypto_hash_BYTES
#define crypto_hash_BYTES 32
#include "Xoodyak.h"
int crypto_hash(unsigned char *out, const unsigned char *in, unsigned long long inlen)
Xoodyak_Instance instance;
Xoodyak_Initialize(&instance, NULL, 0, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, in, (size_t)inlen);
Xoodyak_Squeeze(&instance, out, crypto_hash_BYTES);
#if 0
unsigned int i;
for (i = 0; i < crypto_hash_BYTES; ++i )
printf("\\x%02x", out[i] );
return 0;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Cyclist_h_
#define _Cyclist_h_
#include <stdint.h>
#include "align.h"
#define Cyclist_ModeHash 1
#define Cyclist_ModeKeyed 2
#define Cyclist_PhaseDown 1
#define Cyclist_PhaseUp 2
#ifdef OUTPUT
#include <stdio.h>
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
uint8_t stateShadow[size]; \
FILE *file; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistFunctions(prefix) \
void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \
void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \
void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \
void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \
void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \
void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \
void prefix##_Ratchet(prefix##_Instance *instance);
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
#define SnP_Initialize JOIN(SnP, _Initialize)
#define SnP_AddBytes JOIN(SnP, _AddBytes)
#define SnP_AddByte JOIN(SnP, _AddByte)
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
#define Cyclist_Instance JOIN(prefix, _Instance)
#define Cyclist_Initialize JOIN(prefix, _Initialize)
#define Cyclist_Absorb JOIN(prefix, _Absorb)
#define Cyclist_Encrypt JOIN(prefix, _Encrypt)
#define Cyclist_Decrypt JOIN(prefix, _Decrypt)
#define Cyclist_Squeeze JOIN(prefix, _Squeeze)
#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey)
#define Cyclist_Ratchet JOIN(prefix, _Ratchet)
#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny)
#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey)
#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny)
#define Cyclist_Down JOIN(prefix, _Down)
#define Cyclist_Up JOIN(prefix, _Up)
#define Cyclist_Crypt JOIN(prefix, _Crypt)
#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime)
#define Cyclist_Rhash JOIN(prefix, _Rhash)
#define Cyclist_Rkin JOIN(prefix, _Rkin)
#define Cyclist_Rkout JOIN(prefix, _Rkout)
#define Cyclist_lRatchet JOIN(prefix, _lRatchet)
#if defined(CyclistFullBlocks_supported)
#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks)
#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks)
#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks)
#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks)
#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks)
#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks)
/* ------- Cyclist internal interfaces ------- */
static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd)
SnP_AddBytes(instance->state, Xi, 0, XiLen);
SnP_AddByte(instance->state, 0x01, XiLen);
SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1);
instance->phase = Cyclist_PhaseDown;
static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu)
#if defined(OUTPUT)
uint8_t s[Cyclist_f_bPrime];
if (instance->mode != Cyclist_ModeHash) {
SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1);
#if defined(OUTPUT)
if (instance->file != NULL) {
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime );
SnP_Permute( instance->state );
#if defined(OUTPUT)
if (instance->file != NULL) {
memcpy( instance->stateShadow, instance->state, sizeof(instance->state) );
fprintf( instance->file, "Data XORed" );
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
fprintf( instance->file, "After f() ");
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
instance->phase = Cyclist_PhaseUp;
SnP_ExtractBytes( instance->state, Yi, 0, YiLen );
static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd)
unsigned int splitLen;
do {
if (instance->phase != Cyclist_PhaseUp) {
Cyclist_Up(instance, NULL, 0, 0);
splitLen = MyMin(XLen, r);
Cyclist_Down(instance, X, splitLen, Cd);
Cd = 0;
X += splitLen;
XLen -= splitLen;
#if defined(CyclistFullBlocks_supported)
if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
} while ( XLen != 0 );
static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
uint8_t KID[Cyclist_Rkin];
assert(instance->mode == Cyclist_ModeHash);
assert((KLen + IDLen) <= (Cyclist_Rkin - 1));
instance->mode = Cyclist_ModeKeyed;
instance->Rabsorb = Cyclist_Rkin;
instance->Rsqueeze = Cyclist_Rkout;
if (KLen != 0) {
memcpy(KID, K, KLen);
memcpy(KID + KLen, ID, IDLen);
KID[KLen + IDLen] = (uint8_t)IDLen;
Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02);
if (counterLen != 0) {
Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00);
static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu)
unsigned int len;
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, Cu);
Y += len;
YLen -= len;
while (YLen != 0) {
#if defined(CyclistFullBlocks_supported)
if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
Cyclist_Down(instance, NULL, 0, 0);
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, 0);
Y += len;
YLen -= len;
static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt)
unsigned int splitLen;
uint8_t P[Cyclist_Rkout];
uint8_t Cu = 0x80;
do {
if (decrypt != 0) {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, O, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
else {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
memcpy(P, I, splitLen);
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, P, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
Cu = 0x00;
} while ( IOLen != 0 );
/* ------- Cyclist interfaces ------- */
void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
instance->phase = Cyclist_PhaseUp;
instance->mode = Cyclist_ModeHash;
instance->Rabsorb = Cyclist_Rhash;
instance->Rsqueeze = Cyclist_Rhash;
#ifdef OUTPUT
instance->file = 0;
SnP_Initialize( instance->stateShadow );
if (KLen != 0) {
Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen);
void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen)
Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03);
void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, P, C, PLen, 0);
void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, C, P, CLen, 1);
void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen)
Cyclist_SqueezeAny(instance, Y, YLen, 0x40);
void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_SqueezeAny(instance, K, KLen, 0x20);
void Cyclist_Ratchet(Cyclist_Instance *instance)
uint8_t buffer[Cyclist_lRatchet];
assert(instance->mode == Cyclist_ModeKeyed);
/* Squeeze then absorb is the same as overwriting with zeros */
Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10);
Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00);
#undef SnP_StaticInitialize
#undef SnP_Initialize
#undef SnP_AddBytes
#undef SnP_AddByte
#undef SnP_OverwriteBytes
#undef SnP_ExtractBytes
#undef SnP_ExtractAndAddBytes
#undef Cyclist_Instance
#undef Cyclist_Initialize
#undef Cyclist_Absorb
#undef Cyclist_Encrypt
#undef Cyclist_Decrypt
#undef Cyclist_Squeeze
#undef Cyclist_SqueezeKey
#undef Cyclist_Ratchet
#undef Cyclist_AbsorbAny
#undef Cyclist_AbsorbKey
#undef Cyclist_SqueezeAny
#undef Cyclist_Down
#undef Cyclist_Up
#undef Cyclist_Crypt
#undef Cyclist_f_bPrime
#undef Cyclist_Rhash
#undef Cyclist_Rkin
#undef Cyclist_Rkout
#undef Cyclist_lRatchet
#if defined(CyclistFullBlocks_supported)
#undef Cyclist_AbsorbKeyedFullBlocks
#undef Cyclist_AbsorbHashFullBlocks
#undef Cyclist_SqueezeKeyedFullBlocks
#undef Cyclist_SqueezeHashFullBlocks
#undef Cyclist_EncryptFullBlocks
#undef Cyclist_DecryptFullBlocks
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_SnP_h_
#define _Xoodoo_SnP_h_
#include <stddef.h>
#include <stdint.h>
/** For the documentation, see SnP-documentation.h.
#define Xoodoo_implementation "AVR8 optimized implementation"
#define Xoodoo_stateSizeInBytes (3*4*4)
#define Xoodoo_stateAlignment 1
#define Xoodoo_HasNround
#define Xoodoo_StaticInitialize()
void Xoodoo_Initialize(void *state);
#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData)
void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount);
void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds);
void Xoodoo_Permute_6rounds(void *state);
void Xoodoo_Permute_12rounds(void *state);
void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length);
; The eXtended Keccak Code Package (XKCP)
; The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
; Implementation by Ronny Van Keer, hereby denoted as "the implementer".
; For more information, feedback or questions, please refer to the Keccak Team website:
; To the extent possible under law, the implementer has waived all copyright
; and related or neighboring rights to the source code in this file.
; ---
; This file implements Xoodoo in a SnP-compatible way.
; Please refer to SnP-documentation.h for more details.
; This implementation comes with Xoodoo-SnP.h in the same folder.
; Please refer to for the exact list of other files it must be combined with.
; INFO: Tested on ATmega1280 simulator
; Registers used in all routines
#define zero 1
#define rpState 24
#define rX 26
#define rY 28
#define rZ 30
#define sp 0x3D
; void Xoodoo_StaticInitialize( void )
.global Xoodoo_StaticInitialize
; void Xoodoo_Initialize(void *state)
; argument state is passed in r24:r25
.global Xoodoo_Initialize
movw rZ, r24
ldi r23, 3*4/2 ; clear state (8 bytes / 2 lanes) per iteration
st z+, zero
st z+, zero
st z+, zero
st z+, zero
st z+, zero
st z+, zero
st z+, zero
st z+, zero
dec r23
brne Xoodoo_Initialize_Loop
; void Xoodoo_AddByte(void *state, unsigned char data, unsigned int offset)
; argument state is passed in r24:r25
; argument data is passed in r22:r23, only LSB (r22) is used
; argument offset is passed in r20:r21, only LSB (r20) is used
.global Xoodoo_AddByte
movw rZ, r24
add rZ, r20
adc rZ+1, zero
ld r0, Z
eor r0, r22
st Z, r0
; void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
; argument state is passed in r24:r25
; argument data is passed in r22:r23
; argument offset is passed in r20:r21, only LSB (r20) is used
; argument length is passed in r18:r19, only LSB (r18) is used
.global Xoodoo_AddBytes
movw rZ, r24
add rZ, r20
adc rZ+1, zero
movw rX, r22
subi r18, 8
brcs Xoodoo_AddBytes_Byte
;do 8 bytes per iteration
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
subi r18, 8
brcc Xoodoo_AddBytes_Loop8
ldi r19, 8
add r18, r19
breq Xoodoo_AddBytes_End
ld r21, X+
ld r0, Z
eor r0, r21
st Z+, r0
dec r18
brne Xoodoo_AddBytes_Loop1
; void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
; argument state is passed in r24:r25
; argument data is passed in r22:r23
; argument offset is passed in r20:r21, only LSB (r20) is used
; argument length is passed in r18:r19, only LSB (r18) is used
.global Xoodoo_OverwriteBytes
movw rZ, r24
add rZ, r20
adc rZ+1, zero
movw rX, r22
subi r18, 8
brcs Xoodoo_OverwriteBytes_Byte
;do 8 bytes per iteration
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
subi r18, 8
brcc Xoodoo_OverwriteBytes_Loop8
ldi r19, 8
add r18, r19
breq Xoodoo_OverwriteBytes_End
ld r0, X+
st Z+, r0
dec r18
brne Xoodoo_OverwriteBytes_Loop1
; void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
; argument state is passed in r24:r25
; argument byteCount is passed in r22:r23, only LSB (r22) is used
.global Xoodoo_OverwriteWithZeroes
movw rZ, r24 ; rZ = state
mov r23, r22
lsr r23
lsr r23
lsr r23
breq Xoodoo_OverwriteWithZeroes_Bytes
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
dec r23
brne Xoodoo_OverwriteWithZeroes_LoopLanes
andi r22, 7
breq Xoodoo_OverwriteWithZeroes_End
st Z+, r1
dec r22
brne Xoodoo_OverwriteWithZeroes_LoopBytes
; void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
; argument state is passed in r24:r25
; argument data is passed in r22:r23
; argument offset is passed in r20:r21, only LSB (r20) is used
; argument length is passed in r18:r19, only LSB (r18) is used
.global Xoodoo_ExtractBytes
movw rZ, r24
add rZ, r20
adc rZ+1, zero
movw rX, r22
subi r18, 8
brcs Xoodoo_ExtractBytes_Byte
;do 8 bytes per iteration
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
ld r0, Z+
st X+, r0
subi r18, 8
brcc Xoodoo_ExtractBytes_Loop8
ldi r19, 8
add r18, r19
breq Xoodoo_ExtractBytes_End
ld r0, Z+
st X+, r0
dec r18
brne Xoodoo_ExtractBytes_Loop1
; void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
; argument state is passed in r24:r25
; argument input is passed in r22:r23
; argument output is passed in r20:r21
; argument offset is passed in r18:r19, only LSB (r18) is used
; argument length is passed in r16:r17, only LSB (r16) is used
.global Xoodoo_ExtractAndAddBytes
tst r16
breq Xoodoo_ExtractAndAddBytes_End
push r16
push r28
push r29
movw rZ, r24
add rZ, r18
adc rZ+1, zero
movw rX, r22
movw rY, r20
subi r16, 8
brcs Xoodoo_ExtractAndAddBytes_Byte
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
subi r16, 8
brcc Xoodoo_ExtractAndAddBytes_LoopLane
ldi r19, 8
add r16, r19
breq Xoodoo_ExtractAndAddBytes_Done
ld r21, Z+
ld r0, X+
eor r0, r21
st Y+, r0
dec r16
brne Xoodoo_ExtractAndAddBytes_Loop1
pop r29
pop r28
pop r16
.BYTE 0x58, 0x00
.BYTE 0x38, 0x00
.BYTE 0xC0, 0x03
.BYTE 0xD0, 0x00
.BYTE 0x20, 0x01
.BYTE 0x14, 0x00
.BYTE 0x60, 0x00
.BYTE 0x2C, 0x00
.BYTE 0x80, 0x03
.BYTE 0xF0, 0x00
.BYTE 0xA0, 0x01
.BYTE 0x12, 0x00
.BYTE 0xFF, 0 ; terminator
; Register variables used in permutation
#define rC0 2 // 4 regs (2-5)
#define rC1 6 // 4 regs (6-9)
#define rC2 10 // 4 regs (10-13)
#define rC3 14 // 4 regs (14-17)
#define rVv 18 // 4 regs (18-21)
#define rTt 22 // 4 regs (22-25)
// r26-27 free
#define a00 0
#define a01 4
#define a02 8
#define a03 12
#define a10 16
#define a11 20
#define a12 24
#define a13 28
#define a20 32
#define a21 36
#define a22 40
#define a23 44
; void Xoodoo_Permute_Nrounds( void *state, unsigned int nrounds )
; argument state is passed in r24:r25
; argument nrounds is passed in r22:r23 (only LSB (r22) is used)
.global Xoodoo_Permute_Nrounds
mov r26, r22
ldi rZ+0, lo8(Xoodoo_RoundConstants_0)
ldi rZ+1, hi8(Xoodoo_RoundConstants_0)
lsl r26
sub rZ, r26
sbc rZ+1, zero
rjmp Xoodoo_Permute
; void Xoodoo_Permute_6rounds( void *state )
; argument state is passed in r24:r25
.global Xoodoo_Permute_6rounds
ldi rZ+0, lo8(Xoodoo_RoundConstants_6)
ldi rZ+1, hi8(Xoodoo_RoundConstants_6)
rjmp Xoodoo_Permute
; void Xoodoo_Permute_12rounds( void *state )
; argument state is passed in r24:r25
.global Xoodoo_Permute_12rounds
ldi rZ+0, lo8(Xoodoo_RoundConstants_12)
ldi rZ+1, hi8(Xoodoo_RoundConstants_12)
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
; Initial Prepare Theta
movw rY, rpState
ld rC0+0, Y+ ; a00
ld rC0+1, Y+
ld rC0+2, Y+
ld rC0+3, Y+
ld rC1+0, Y+ ; a01
ld rC1+1, Y+
ld rC1+2, Y+
ld rC1+3, Y+
ld rC2+0, Y+ ; a02
ld rC2+1, Y+
ld rC2+2, Y+
ld rC2+3, Y+
ld rC3+0, Y+ ; a03
ld rC3+1, Y+
ld rC3+2, Y+
ld rC3+3, Y+
ld r0, Y+ ; a10
eor rC0+0, r0
ld r0, Y+
eor rC0+1, r0
ld r0, Y+
eor rC0+2, r0
ld r0, Y+
eor rC0+3, r0
ld r0, Y+ ; a11
eor rC1+0, r0
ld r0, Y+
eor rC1+1, r0
ld r0, Y+
eor rC1+2, r0
ld r0, Y+
eor rC1+3, r0
ld r0, Y+ ; a12
eor rC2+0, r0
ld r0, Y+
eor rC2+1, r0
ld r0, Y+
eor rC2+2, r0
ld r0, Y+
eor rC2+3, r0
ld r0, Y+ ; a13
eor rC3+0, r0
ld r0, Y+
eor rC3+1, r0
ld r0, Y+
eor rC3+2, r0
ld r0, Y+
eor rC3+3, r0
ld r0, Y+ ; a20
eor rC0+0, r0
ld r0, Y+
eor rC0+1, r0
ld r0, Y+
eor rC0+2, r0
ld r0, Y+
eor rC0+3, r0
ld r0, Y+ ; a21
eor rC1+0, r0
ld r0, Y+
eor rC1+1, r0
ld r0, Y+
eor rC1+2, r0
ld r0, Y+
eor rC1+3, r0
ld r0, Y+ ; a22
eor rC2+0, r0
ld r0, Y+
eor rC2+1, r0
ld r0, Y+
eor rC2+2, r0
ld r0, Y+
eor rC2+3, r0
ld r0, Y+ ; a23
eor rC3+0, r0
ld r0, Y+
eor rC3+1, r0
ld r0, Y+
eor rC3+2, r0
ld r0, Y+
eor rC3+3, r0
sbiw rY, 48
; Theta + Rho west
; c0 = ROTL32(c0 ^ ROTL32(c0, 9), 5);
mov rVv+1, rC0+0 ; rol 9
mov rVv+2, rC0+1
mov rVv+3, rC0+2
mov rVv+0, rC0+3
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
eor rVv+0, rC0+0
eor rVv+1, rC0+1
eor rVv+2, rC0+2
eor rVv+3, rC0+3
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
mov rC0+0, rVv+3
mov rC0+1, rVv+0
mov rC0+2, rVv+1
mov rC0+3, rVv+2
; c1 = ROTL32(c1 ^ ROTL32(c1, 9), 5);
mov rVv+1, rC1+0 ; rol 9
mov rVv+2, rC1+1
mov rVv+3, rC1+2
mov rVv+0, rC1+3
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
eor rVv+0, rC1+0
eor rVv+1, rC1+1
eor rVv+2, rC1+2
eor rVv+3, rC1+3
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
mov rC1+0, rVv+3
mov rC1+1, rVv+0
mov rC1+2, rVv+1
mov rC1+3, rVv+2
; c2 = ROTL32(c2 ^ ROTL32(c2, 9), 5);
mov rVv+1, rC2+0 ; rol 9
mov rVv+2, rC2+1
mov rVv+3, rC2+2
mov rVv+0, rC2+3
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
eor rVv+0, rC2+0
eor rVv+1, rC2+1
eor rVv+2, rC2+2
eor rVv+3, rC2+3
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
mov rC2+0, rVv+3
mov rC2+1, rVv+0
mov rC2+2, rVv+1
mov rC2+3, rVv+2
; c3 = ROTL32(c3 ^ ROTL32(c3, 9), 5);
mov rVv+1, rC3+0 ; rol 9
mov rVv+2, rC3+1
mov rVv+3, rC3+2
mov rVv+0, rC3+3
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
eor rVv+0, rC3+0
eor rVv+1, rC3+1
eor rVv+2, rC3+2
eor rVv+3, rC3+3
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
bst rVv, 0
ror rVv+3
ror rVv+2
ror rVv+1
ror rVv
bld rVv+3, 7
mov rC3+0, rVv+3
mov rC3+1, rVv+0
mov rC3+2, rVv+1
mov rC3+3, rVv+2
; v1 = a13;
ldd rVv+0, Y+a13+0
ldd rVv+1, Y+a13+1
ldd rVv+2, Y+a13+2
ldd rVv+3, Y+a13+3
; a13 = a12 ^ c1;
ldd r0, Y+a12+0
eor r0, rC1+0
std Y+a13+0, r0
ldd r0, Y+a12+1
eor r0, rC1+1
std Y+a13+1, r0
ldd r0, Y+a12+2
eor r0, rC1+2
std Y+a13+2, r0
ldd r0, Y+a12+3
eor r0, rC1+3
std Y+a13+3, r0
; a12 = a11 ^ c0;
ldd r0, Y+a11+0
eor r0, rC0+0
std Y+a12+0, r0
ldd r0, Y+a11+1
eor r0, rC0+1
std Y+a12+1, r0
ldd r0, Y+a11+2
eor r0, rC0+2
std Y+a12+2, r0
ldd r0, Y+a11+3
eor r0, rC0+3
std Y+a12+3, r0
; a11 = a10 ^ c3;
ldd r0, Y+a10+0
eor r0, rC3+0
std Y+a11+0, r0
ldd r0, Y+a10+1
eor r0, rC3+1
std Y+a11+1, r0
ldd r0, Y+a10+2
eor r0, rC3+2
std Y+a11+2, r0
ldd r0, Y+a10+3
eor r0, rC3+3
std Y+a11+3, r0
; a10 = v1 ^ c2;
eor rVv+0, rC2+0
std Y+a10+0, rVv+0
eor rVv+1, rC2+1
std Y+a10+1, rVv+1
eor rVv+2, rC2+2
std Y+a10+2, rVv+2
eor rVv+3, rC2+3
std Y+a10+3, rVv+3
; a20 = ROTL32(a20 ^ c3, 11);
ldd rVv+0, Y+a20+3
eor rVv+0, rC3+3
ldd rVv+1, Y+a20+0
eor rVv+1, rC3+0
ldd rVv+2, Y+a20+1
eor rVv+2, rC3+1
ldd rVv+3, Y+a20+2
eor rVv+3, rC3+2
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
std Y+a20+0, rVv+0
std Y+a20+1, rVv+1
std Y+a20+2, rVv+2
std Y+a20+3, rVv+3
; a21 = ROTL32(a21 ^ c0, 11);
ldd rVv+0, Y+a21+3
eor rVv+0, rC0+3
ldd rVv+1, Y+a21+0
eor rVv+1, rC0+0
ldd rVv+2, Y+a21+1
eor rVv+2, rC0+1
ldd rVv+3, Y+a21+2
eor rVv+3, rC0+2
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
std Y+a21+0, rVv+0
std Y+a21+1, rVv+1
std Y+a21+2, rVv+2
std Y+a21+3, rVv+3
; a22 = ROTL32(a22 ^ c1, 11);
ldd rVv+0, Y+a22+3
eor rVv+0, rC1+3
ldd rVv+1, Y+a22+0
eor rVv+1, rC1+0
ldd rVv+2, Y+a22+1
eor rVv+2, rC1+1
ldd rVv+3, Y+a22+2
eor rVv+3, rC1+2
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
std Y+a22+0, rVv+0
std Y+a22+1, rVv+1
std Y+a22+2, rVv+2
std Y+a22+3, rVv+3
; a23 = ROTL32(a23 ^ c2, 11);
ldd rVv+0, Y+a23+3
eor rVv+0, rC2+3
ldd rVv+1, Y+a23+0
eor rVv+1, rC2+0
ldd rVv+2, Y+a23+1
eor rVv+2, rC2+1
ldd rVv+3, Y+a23+2
eor rVv+3, rC2+2
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
lsl rVv+0
rol rVv+1
rol rVv+2
rol rVv+3
adc rVv+0, zero
std Y+a23+0, rVv+0
std Y+a23+1, rVv+1
std Y+a23+2, rVv+2
std Y+a23+3, rVv+3
; v1 = c3;
movw rVv+0, rC3+0
movw rVv+2, rC3+2
; c3 = a03 ^ c2; /* a03 resides in c3 */
ldd rC3+0, Y+a03+0
eor rC3+0, rC2+0
ldd rC3+1, Y+a03+1
eor rC3+1, rC2+1
ldd rC3+2, Y+a03+2
eor rC3+2, rC2+2
ldd rC3+3, Y+a03+3
eor rC3+3, rC2+3
; c2 = a02 ^ c1; /* a02 resides in c2 */
ldd rC2+0, Y+a02+0
eor rC2+0, rC1+0
ldd rC2+1, Y+a02+1
eor rC2+1, rC1+1
ldd rC2+2, Y+a02+2
eor rC2+2, rC1+2
ldd rC2+3, Y+a02+3
eor rC2+3, rC1+3
; c1 = a01 ^ c0; /* a01 resides in c1 */
ldd rC1+0, Y+a01+0
eor rC1+0, rC0+0
ldd rC1+1, Y+a01+1
eor rC1+1, rC0+1
ldd rC1+2, Y+a01+2
eor rC1+2, rC0+2
ldd rC1+3, Y+a01+3
eor rC1+3, rC0+3
; c0 = a00 ^ v1; /* a00 resides in c0 */
ldd rC0+0, Y+a00+0
eor rC0+0, rVv+0
ldd rC0+1, Y+a00+1
eor rC0+1, rVv+1
ldd rC0+2, Y+a00+2
eor rC0+2, rVv+2
ldd rC0+3, Y+a00+3
eor rC0+3, rVv+3
; c0 ^= __rc; /* +Iota */
lpm rVv+0, Z+
lpm rVv+1, Z+
eor rC0+0, rVv+0
eor rC0+1, rVv+1
; Chi + Rho east + Early Theta
; a00 = c0 ^= ~a10 & a20;
ldd r0, Y+a10+0
com r0
ldd rTt+0, Y+a20+0 ; a20 in rTt
and r0, rTt+0
eor rC0+0, r0
std Y+a00+0, rC0+0
ldd r0, Y+a10+1
com r0
ldd rTt+1, Y+a20+1
and r0, rTt+1
eor rC0+1, r0
std Y+a00+1, rC0+1
ldd r0, Y+a10+2
com r0
ldd rTt+2, Y+a20+2
and r0, rTt+2
eor rC0+2, r0
std Y+a00+2, rC0+2
ldd r0, Y+a10+3
com r0
ldd rTt+3, Y+a20+3
and r0, rTt+3
eor rC0+3, r0
std Y+a00+3, rC0+3
; a10 ^= ~a20 & c0;
com rTt+0
and rTt+0, rC0+0
ldd r0, Y+a10+0
eor rTt+0, r0 ; new a10 in rTt
std Y+a10+0, rTt+0
com rTt+1
and rTt+1, rC0+1
ldd r0, Y+a10+1
eor rTt+1, r0
std Y+a10+1, rTt+1
com rTt+2
and rTt+2, rC0+2
ldd r0, Y+a10+2
eor rTt+2, r0
std Y+a10+2, rTt+2
com rTt+3
and rTt+3, rC0+3
ldd r0, Y+a10+3
eor rTt+3, r0
std Y+a10+3, rTt+3
; v1(a20) = ROTL32(a20 ^ ~c0 & a10, 8);
movw rVv+0, rTt+0 ; a10 in rVv
movw rVv+2, rTt+2
mov r0, rC0+0
com r0
and rTt+0, r0
ldd r0, Y+a20+0
eor rTt+0, r0
mov r0, rC0+1
com r0
and rTt+1, r0
ldd r0, Y+a20+1
eor rTt+1, r0
mov r0, rC0+2
com r0
and rTt+2, r0
ldd r0, Y+a20+2
eor rTt+2, r0
mov r0, rC0+3
com r0
and rTt+3, r0
ldd r0, Y+a20+3
eor rTt+3, r0
std Y+a20+0, rTt+3
std Y+a20+1, rTt+0
std Y+a20+2, rTt+1
std Y+a20+3, rTt+2
; c0 ^= a10 = ROTL32(a10, 1);
lsl rVv+0
rol rVv+1
std Y+a10+1, rVv+1
eor rC0+1, rVv+1
rol rVv+2
std Y+a10+2, rVv+2
eor rC0+2, rVv+2
rol rVv+3
std Y+a10+3, rVv+3
eor rC0+3, rVv+3
adc rVv+0, zero
std Y+a10+0, rVv+0
eor rC0+0, rVv+0
; a02 = c2 ^= ~a12 & a22;
ldd r0, Y+a12+0
com r0
ldd rVv+0, Y+a22+0 ; a22 in rVv
and r0, rVv+0
eor rC2+0, r0
std Y+a02+0, rC2+0
ldd r0, Y+a12+1
com r0
ldd rVv+1, Y+a22+1
and r0, rVv+1
eor rC2+1, r0
std Y+a02+1, rC2+1
ldd r0, Y+a12+2
com r0
ldd rVv+2, Y+a22+2
and r0, rVv+2
eor rC2+2, r0
std Y+a02+2, rC2+2
ldd r0, Y+a12+3
com r0
ldd rVv+3, Y+a22+3
and r0, rVv+3
eor rC2+3, r0
std Y+a02+3, rC2+3
; a12 ^= ~a22 & c2;
mov r0, rVv+0 ; a12 in rTt
com r0
and r0, rC2+0
ldd rTt+0, Y+a12+0
eor rTt+0, r0
std Y+a12+0, rTt+0
mov r0, rVv+1
com r0
and r0, rC2+1
ldd rTt+1, Y+a12+1
eor rTt+1, r0
std Y+a12+1, rTt+1
mov r0, rVv+2
com r0
and r0, rC2+2
ldd rTt+2, Y+a12+2
eor rTt+2, r0
std Y+a12+2, rTt+2
mov r0, rVv+3
com r0
and r0, rC2+3
ldd rTt+3, Y+a12+3
eor rTt+3, r0
std Y+a12+3, rTt+3
; c0 ^= a20 = ROTL32(a22 ^ ~c2 & a12, 8);
mov r0, rC2+0
com r0
and r0, rTt+0
eor r0, rVv+0
ldd rVv+0, Y+a20+1 ; rVv = a22
std Y+a20+1, r0
eor rC0+1, r0
mov r0, rC2+1
com r0
and r0, rTt+1
eor r0, rVv+1
ldd rVv+1, Y+a20+2
std Y+a20+2, r0
eor rC0+2, r0
mov r0, rC2+2
com r0
and r0, rTt+2
eor r0, rVv+2
ldd rVv+2, Y+a20+3
std Y+a20+3, r0
eor rC0+3, r0
mov r0, rC2+3
com r0
and r0, rTt+3
eor r0, rVv+3
ldd rVv+3, Y+a20+0
std Y+a20+0, r0
eor rC0+0, r0
; c2 ^= a12 = ROTL32(a12, 1);
lsl rTt+0
rol rTt+1
eor rC2+1, rTt+1
std Y+a12+1, rTt+1
rol rTt+2
eor rC2+2, rTt+2
std Y+a12+2, rTt+2
rol rTt+3
eor rC2+3, rTt+3
std Y+a12+3, rTt+3
adc rTt+0, zero
eor rC2+0, rTt+0
std Y+a12+0, rTt+0
; a22 = v1;
std Y+a22+0, rVv+3
std Y+a22+1, rVv+0
std Y+a22+2, rVv+1
std Y+a22+3, rVv+2
; c2 ^= v1;
eor rC2+0, rVv+3
eor rC2+1, rVv+0
eor rC2+2, rVv+1
eor rC2+3, rVv+2
; a01 = c1 ^= ~a11 & a21;
ldd rTt+0, Y+a11+0 ;rTt holds a11
mov r0, rTt+0
com r0
ldd rVv+0, Y+a21+0 ;rVv holds a21
and r0, rVv+0
eor rC1+0, r0
std Y+a01+0, rC1+0
ldd rTt+1, Y+a11+1
mov r0, rTt+1
com r0
ldd rVv+1, Y+a21+1
and r0, rVv+1
eor rC1+1, r0
std Y+a01+1, rC1+1
ldd rTt+2, Y+a11+2
mov r0, rTt+2
com r0
ldd rVv+2, Y+a21+2
and r0, rVv+2
eor rC1+2, r0
std Y+a01+2, rC1+2
ldd rTt+3, Y+a11+3
mov r0, rTt+3
com r0
ldd rVv+3, Y+a21+3
and r0, rVv+3
eor rC1+3, r0
std Y+a01+3, rC1+3
; a11 ^= ~a21 & c1;
mov r0, rVv+0
com r0
and r0, rC1+0
eor rTt+0, r0
std Y+a11+0, rTt+0
mov r0, rVv+1
com r0
and r0, rC1+1
eor rTt+1, r0
std Y+a11+1, rTt+1
mov r0, rVv+2
com r0
and r0, rC1+2
eor rTt+2, r0
std Y+a11+2, rTt+2
mov r0, rVv+3
com r0
and r0, rC1+3
eor rTt+3, r0
std Y+a11+3, rTt+3
; v1 = ROTL32(a21 ^ ~c1 & a11, 8);
mov r0, rC1+0
com r0
and r0, rTt+0
eor rVv+0, r0 ; v1 not yet ROTL32'ed(8)
mov r0, rC1+1
com r0
and r0, rTt+1
eor rVv+1, r0
mov r0, rC1+2
com r0
and r0, rTt+2
eor rVv+2, r0
mov r0, rC1+3
com r0
and r0, rTt+3
eor rVv+3, r0
; c1 ^= a11 = ROTL32(a11, 1);
lsl rTt+0
rol rTt+1
eor rC1+1, rTt+1
std Y+a11+1, rTt+1
rol rTt+2
eor rC1+2, rTt+2
std Y+a11+2, rTt+2
rol rTt+3
eor rC1+3, rTt+3
std Y+a11+3, rTt+3
adc rTt+0, zero
eor rC1+0, rTt+0
std Y+a11+0, rTt+0
; a03 = c3 ^= ~a13 & a23;
ldd r0, Y+a13+0
com r0
ldd rTt+0, Y+a23+0 ; a23 in rTt
and r0, rTt+0
eor rC3+0, r0
std Y+a03+0, rC3+0
ldd r0, Y+a13+1
com r0
ldd rTt+1, Y+a23+1
and r0, rTt+1
eor rC3+1, r0
std Y+a03+1, rC3+1
ldd r0, Y+a13+2
com r0
ldd rTt+2, Y+a23+2
and r0, rTt+2
eor rC3+2, r0
std Y+a03+2, rC3+2
ldd r0, Y+a13+3
com r0
ldd rTt+3, Y+a23+3
and r0, rTt+3
eor rC3+3, r0
std Y+a03+3, rC3+3
; a13 ^= ~a23 & c3;
mov r0, rTt+0
com r0
and r0, rC3+0
ldd rTt+0, Y+a13+0 ; a13 in rTt
eor rTt+0, r0
mov r0, rTt+1
com r0
and r0, rC3+1
ldd rTt+1, Y+a13+1
eor rTt+1, r0
mov r0, rTt+2
com r0
and r0, rC3+2
ldd rTt+2, Y+a13+2
eor rTt+2, r0
mov r0, rTt+3
com r0
and r0, rC3+3
ldd rTt+3, Y+a13+3
eor rTt+3, r0
; c1 ^= a21 = ROTL32(a23 ^ ~c3 & a13, 8);
push rVv
mov r0, rC3+0
com r0
and r0, rTt+0
ldd rVv, Y+a23+0
eor r0, rVv
eor rC1+1, r0
std Y+a21+1, r0
mov r0, rC3+1
com r0
and r0, rTt+1
ldd rVv, Y+a23+1
eor r0, rVv
eor rC1+2, r0
std Y+a21+2, r0
mov r0, rC3+2
com r0
and r0, rTt+2
ldd rVv, Y+a23+2
eor r0, rVv
eor rC1+3, r0
std Y+a21+3, r0
mov r0, rC3+3
com r0
and r0, rTt+3
ldd rVv, Y+a23+3
eor r0, rVv
eor rC1+0, r0
std Y+a21+0, r0
pop rVv
; a23 = v1;
std Y+a23+0, rVv+3 ; rol8(rVv)
std Y+a23+1, rVv+0
std Y+a23+2, rVv+1
std Y+a23+3, rVv+2
; c3 ^= v1;
eor rC3+0, rVv+3
eor rC3+1, rVv+0
eor rC3+2, rVv+1
eor rC3+3, rVv+2
; c3 ^= a13 = ROTL32(a13, 1);
lsl rTt+0
rol rTt+1
std Y+a13+1, rTt+1
eor rC3+1, rTt+1
rol rTt+2
std Y+a13+2, rTt+2
eor rC3+2, rTt+2
rol rTt+3
std Y+a13+3, rTt+3
eor rC3+3, rTt+3
adc rTt+0, zero
std Y+a13+0, rTt+0
eor rC3+0, rTt+0
; Check for terminator
lpm r0, Z
inc r0
breq Xoodoo_Done
rjmp Xoodoo_RoundLoop
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_h_
#define _Xoodoo_h_
#include <stdint.h>
#include <stdlib.h>
#define MAXROUNDS 12
#define NROWS 3
#define NCOLUMS 4
/* Round constants */
#define _rc12 0x00000058
#define _rc11 0x00000038
#define _rc10 0x000003C0
#define _rc9 0x000000D0
#define _rc8 0x00000120
#define _rc7 0x00000014
#define _rc6 0x00000060
#define _rc5 0x0000002C
#define _rc4 0x00000380
#define _rc3 0x000000F0
#define _rc2 0x000001A0
#define _rc1 0x00000012
#if !defined(ROTL32)
#if defined (__arm__) && !defined(__GNUC__)
#define ROTL32(a, offset) __ror(a, (32-(offset))%32)
#elif defined(_MSC_VER)
#define ROTL32(a, offset) _rotl(a, (offset)%32)
#define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32)))
#if !defined(READ32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress)))
#elif defined(_MSC_VER)
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#if !defined(WRITE32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData))
#elif defined(_MSC_VER)
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#if !defined(index)
#define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS))
typedef uint32_t tXoodooLane;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_parameters_h_
#define _Xoodyak_parameters_h_
#define Xoodyak_f_bPrime 48
#define Xoodyak_Rhash 16
#define Xoodyak_Rkin 44
#define Xoodyak_Rkout 24
#define Xoodyak_lRatchet 16
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifdef XoodooReference
#include "displayIntermediateValues.h"
#include <assert.h>
#include <string.h>
#include "Xoodyak.h"
#ifdef OUTPUT
#include <stdlib.h>
#include <string.h>
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length);
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length)
unsigned int i;
fprintf(f, "%s:", synopsis);
for(i=0; i<length; i++)
fprintf(f, " %02x", (unsigned int)data[i]);
fprintf(f, "\n");
#define MyMin(a,b) (((a) < (b)) ? (a) : (b))
#ifdef XKCP_has_Xoodoo
#include "Xoodoo-SnP.h"
#define SnP Xoodoo
#define SnP_Permute Xoodoo_Permute_12rounds
#define prefix Xoodyak
#include ""
#undef prefix
#undef SnP
#undef SnP_Permute
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_h_
#define _Xoodyak_h_
#include "config.h"
#ifdef XKCP_has_Xoodoo
#include <stddef.h>
#include "Cyclist.h"
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment)
#error This requires an implementation of Xoodoo
The eXtended Keccak Code Package (XKCP)
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _align_h_
#define _align_h_
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
#ifdef ALIGN
#undef ALIGN
#if defined(__GNUC__)
#define ALIGN(x) __attribute__ ((aligned(x)))
#elif defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#elif defined(__ARMCC_VERSION)
#define ALIGN(x) __align(x)
#define ALIGN(x)
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
#elif defined( LITTLE_ENDIAN )
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
#elif defined( _LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
#elif defined( __LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# endif
#elif defined( __BIG_ENDIAN__ )
#elif defined( __LITTLE_ENDIAN__ )
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \
defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ )
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# else
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
/* File generated by ToTargetConfigFile.xsl */
#define XKCP_has_Xoodyak
#define XKCP_has_Xoodoo
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include "crypto_hash.h"
#ifndef crypto_hash_BYTES
#define crypto_hash_BYTES 32
#include "Xoodyak.h"
int crypto_hash(unsigned char *out, const unsigned char *in, unsigned long long inlen)
Xoodyak_Instance instance;
Xoodyak_Initialize(&instance, NULL, 0, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, in, (size_t)inlen);
Xoodyak_Squeeze(&instance, out, crypto_hash_BYTES);
#if 0
unsigned int i;
for (i = 0; i < crypto_hash_BYTES; ++i )
printf("\\x%02x", out[i] );
return 0;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Cyclist_h_
#define _Cyclist_h_
#include <stdint.h>
#include "align.h"
#define Cyclist_ModeHash 1
#define Cyclist_ModeKeyed 2
#define Cyclist_PhaseDown 1
#define Cyclist_PhaseUp 2
#ifdef OUTPUT
#include <stdio.h>
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
uint8_t stateShadow[size]; \
FILE *file; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_CyclistInstanceStruct { \
uint8_t state[size]; \
unsigned int phase; \
unsigned int mode; \
unsigned int Rabsorb; \
unsigned int Rsqueeze; \
} prefix##_Instance;
#define KCP_DeclareCyclistFunctions(prefix) \
void prefix##_Initialize(prefix##_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen); \
void prefix##_Absorb(prefix##_Instance *instance, const uint8_t *X, size_t XLen); \
void prefix##_Encrypt(prefix##_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen); \
void prefix##_Decrypt(prefix##_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen); \
void prefix##_Squeeze(prefix##_Instance *instance, uint8_t *Y, size_t YLen); \
void prefix##_SqueezeKey(prefix##_Instance *instance, uint8_t *K, size_t KLen); \
void prefix##_Ratchet(prefix##_Instance *instance);
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
#define SnP_Initialize JOIN(SnP, _Initialize)
#define SnP_AddBytes JOIN(SnP, _AddBytes)
#define SnP_AddByte JOIN(SnP, _AddByte)
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
#define Cyclist_Instance JOIN(prefix, _Instance)
#define Cyclist_Initialize JOIN(prefix, _Initialize)
#define Cyclist_Absorb JOIN(prefix, _Absorb)
#define Cyclist_Encrypt JOIN(prefix, _Encrypt)
#define Cyclist_Decrypt JOIN(prefix, _Decrypt)
#define Cyclist_Squeeze JOIN(prefix, _Squeeze)
#define Cyclist_SqueezeKey JOIN(prefix, _SqueezeKey)
#define Cyclist_Ratchet JOIN(prefix, _Ratchet)
#define Cyclist_AbsorbAny JOIN(prefix, _AbsorbAny)
#define Cyclist_AbsorbKey JOIN(prefix, _AbsorbKey)
#define Cyclist_SqueezeAny JOIN(prefix, _SqueezeAny)
#define Cyclist_Down JOIN(prefix, _Down)
#define Cyclist_Up JOIN(prefix, _Up)
#define Cyclist_Crypt JOIN(prefix, _Crypt)
#define Cyclist_f_bPrime JOIN(prefix, _f_bPrime)
#define Cyclist_Rhash JOIN(prefix, _Rhash)
#define Cyclist_Rkin JOIN(prefix, _Rkin)
#define Cyclist_Rkout JOIN(prefix, _Rkout)
#define Cyclist_lRatchet JOIN(prefix, _lRatchet)
#if defined(CyclistFullBlocks_supported)
#define Cyclist_AbsorbKeyedFullBlocks JOIN(prefix, _AbsorbKeyedFullBlocks)
#define Cyclist_AbsorbHashFullBlocks JOIN(prefix, _AbsorbHashFullBlocks)
#define Cyclist_SqueezeKeyedFullBlocks JOIN(prefix, _SqueezeKeyedFullBlocks)
#define Cyclist_SqueezeHashFullBlocks JOIN(prefix, _SqueezeHashFullBlocks)
#define Cyclist_EncryptFullBlocks JOIN(prefix, _EncryptFullBlocks)
#define Cyclist_DecryptFullBlocks JOIN(prefix, _DecryptFullBlocks)
/* ------- Cyclist internal interfaces ------- */
static void Cyclist_Down(Cyclist_Instance *instance, const uint8_t *Xi, unsigned int XiLen, uint8_t Cd)
SnP_AddBytes(instance->state, Xi, 0, XiLen);
SnP_AddByte(instance->state, 0x01, XiLen);
SnP_AddByte(instance->state, (instance->mode == Cyclist_ModeHash) ? (Cd & 0x01) : Cd, Cyclist_f_bPrime - 1);
instance->phase = Cyclist_PhaseDown;
static void Cyclist_Up(Cyclist_Instance *instance, uint8_t *Yi, unsigned int YiLen, uint8_t Cu)
#if defined(OUTPUT)
uint8_t s[Cyclist_f_bPrime];
if (instance->mode != Cyclist_ModeHash) {
SnP_AddByte(instance->state, Cu, Cyclist_f_bPrime - 1);
#if defined(OUTPUT)
if (instance->file != NULL) {
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
SnP_ExtractAndAddBytes( instance->state, s, s, 0, Cyclist_f_bPrime );
SnP_Permute( instance->state );
#if defined(OUTPUT)
if (instance->file != NULL) {
memcpy( instance->stateShadow, instance->state, sizeof(instance->state) );
fprintf( instance->file, "Data XORed" );
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
SnP_ExtractBytes( instance->stateShadow, s, 0, Cyclist_f_bPrime );
fprintf( instance->file, "After f() ");
displayByteString( instance->file, "", s, Cyclist_f_bPrime );
instance->phase = Cyclist_PhaseUp;
SnP_ExtractBytes( instance->state, Yi, 0, YiLen );
static void Cyclist_AbsorbAny(Cyclist_Instance *instance, const uint8_t *X, size_t XLen, unsigned int r, uint8_t Cd)
unsigned int splitLen;
do {
if (instance->phase != Cyclist_PhaseUp) {
Cyclist_Up(instance, NULL, 0, 0);
splitLen = MyMin(XLen, r);
Cyclist_Down(instance, X, splitLen, Cd);
Cd = 0;
X += splitLen;
XLen -= splitLen;
#if defined(CyclistFullBlocks_supported)
if ((r == Cyclist_Rkin) && (XLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_AbsorbKeyedFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
else if ((r == Cyclist_Rhash) && (XLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_AbsorbHashFullBlocks(instance->state, X, XLen);
X += lenProcessed;
XLen -= lenProcessed;
} while ( XLen != 0 );
static void Cyclist_AbsorbKey(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
uint8_t KID[Cyclist_Rkin];
assert(instance->mode == Cyclist_ModeHash);
assert((KLen + IDLen) <= (Cyclist_Rkin - 1));
instance->mode = Cyclist_ModeKeyed;
instance->Rabsorb = Cyclist_Rkin;
instance->Rsqueeze = Cyclist_Rkout;
if (KLen != 0) {
memcpy(KID, K, KLen);
memcpy(KID + KLen, ID, IDLen);
KID[KLen + IDLen] = (uint8_t)IDLen;
Cyclist_AbsorbAny(instance, KID, KLen + IDLen + 1, instance->Rabsorb, 0x02);
if (counterLen != 0) {
Cyclist_AbsorbAny(instance, counter, counterLen, 1, 0x00);
static void Cyclist_SqueezeAny(Cyclist_Instance *instance, uint8_t *Y, size_t YLen, uint8_t Cu)
unsigned int len;
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, Cu);
Y += len;
YLen -= len;
while (YLen != 0) {
#if defined(CyclistFullBlocks_supported)
if ((instance->mode == Cyclist_ModeKeyed) && (YLen >= Cyclist_Rkin)) {
size_t lenProcessed = Cyclist_SqueezeKeyedFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
else if ((instance->mode == Cyclist_ModeHash) && (YLen >= Cyclist_Rhash)) {
size_t lenProcessed = Cyclist_SqueezeHashFullBlocks(instance->state, Y, YLen);
Y += lenProcessed;
YLen -= lenProcessed;
Cyclist_Down(instance, NULL, 0, 0);
len = MyMin(YLen, instance->Rsqueeze );
Cyclist_Up(instance, Y, len, 0);
Y += len;
YLen -= len;
static void Cyclist_Crypt(Cyclist_Instance *instance, const uint8_t *I, uint8_t *O, size_t IOLen, int decrypt)
unsigned int splitLen;
uint8_t P[Cyclist_Rkout];
uint8_t Cu = 0x80;
do {
if (decrypt != 0) {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_DecryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, O, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
else {
#if defined(CyclistFullBlocks_supported)
if ((Cu == 0) && (IOLen >= Cyclist_Rkout)) {
size_t lenProcessed = Cyclist_EncryptFullBlocks(instance->state, I, O, IOLen);
I += lenProcessed;
O += lenProcessed;
IOLen -= lenProcessed;
splitLen = MyMin(IOLen, Cyclist_Rkout); /* use Rkout instead of Rsqueeze, this function is only called in keyed mode */
memcpy(P, I, splitLen);
Cyclist_Up(instance, NULL, 0, Cu); /* Up without extract */
Xoodoo_ExtractAndAddBytes(instance->state, I, O, 0, splitLen); /* Extract from Up and Add */
Cyclist_Down(instance, P, splitLen, 0x00);
I += splitLen;
O += splitLen;
IOLen -= splitLen;
Cu = 0x00;
} while ( IOLen != 0 );
/* ------- Cyclist interfaces ------- */
void Cyclist_Initialize(Cyclist_Instance *instance, const uint8_t *K, size_t KLen, const uint8_t *ID, size_t IDLen, const uint8_t *counter, size_t counterLen)
instance->phase = Cyclist_PhaseUp;
instance->mode = Cyclist_ModeHash;
instance->Rabsorb = Cyclist_Rhash;
instance->Rsqueeze = Cyclist_Rhash;
#ifdef OUTPUT
instance->file = 0;
SnP_Initialize( instance->stateShadow );
if (KLen != 0) {
Cyclist_AbsorbKey(instance, K, KLen, ID, IDLen, counter, counterLen);
void Cyclist_Absorb(Cyclist_Instance *instance, const uint8_t *X, size_t XLen)
Cyclist_AbsorbAny(instance, X, XLen, instance->Rabsorb, 0x03);
void Cyclist_Encrypt(Cyclist_Instance *instance, const uint8_t *P, uint8_t *C, size_t PLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, P, C, PLen, 0);
void Cyclist_Decrypt(Cyclist_Instance *instance, const uint8_t *C, uint8_t *P, size_t CLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_Crypt(instance, C, P, CLen, 1);
void Cyclist_Squeeze(Cyclist_Instance *instance, uint8_t *Y, size_t YLen)
Cyclist_SqueezeAny(instance, Y, YLen, 0x40);
void Cyclist_SqueezeKey(Cyclist_Instance *instance, uint8_t *K, size_t KLen)
assert(instance->mode == Cyclist_ModeKeyed);
Cyclist_SqueezeAny(instance, K, KLen, 0x20);
void Cyclist_Ratchet(Cyclist_Instance *instance)
uint8_t buffer[Cyclist_lRatchet];
assert(instance->mode == Cyclist_ModeKeyed);
/* Squeeze then absorb is the same as overwriting with zeros */
Cyclist_SqueezeAny(instance, buffer, sizeof(buffer), 0x10);
Cyclist_AbsorbAny(instance, buffer, sizeof(buffer), instance->Rabsorb, 0x00);
#undef SnP_StaticInitialize
#undef SnP_Initialize
#undef SnP_AddBytes
#undef SnP_AddByte
#undef SnP_OverwriteBytes
#undef SnP_ExtractBytes
#undef SnP_ExtractAndAddBytes
#undef Cyclist_Instance
#undef Cyclist_Initialize
#undef Cyclist_Absorb
#undef Cyclist_Encrypt
#undef Cyclist_Decrypt
#undef Cyclist_Squeeze
#undef Cyclist_SqueezeKey
#undef Cyclist_Ratchet
#undef Cyclist_AbsorbAny
#undef Cyclist_AbsorbKey
#undef Cyclist_SqueezeAny
#undef Cyclist_Down
#undef Cyclist_Up
#undef Cyclist_Crypt
#undef Cyclist_f_bPrime
#undef Cyclist_Rhash
#undef Cyclist_Rkin
#undef Cyclist_Rkout
#undef Cyclist_lRatchet
#if defined(CyclistFullBlocks_supported)
#undef Cyclist_AbsorbKeyedFullBlocks
#undef Cyclist_AbsorbHashFullBlocks
#undef Cyclist_SqueezeKeyedFullBlocks
#undef Cyclist_SqueezeHashFullBlocks
#undef Cyclist_EncryptFullBlocks
#undef Cyclist_DecryptFullBlocks
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_SnP_h_
#define _Xoodoo_SnP_h_
#include <stddef.h>
#include <stdint.h>
/** For the documentation, see SnP-documentation.h.
#define Xoodoo_implementation "32-bit optimized implementation"
#define Xoodoo_stateSizeInBytes (3*4*4)
#define Xoodoo_stateAlignment 4
#define Xoodoo_HasNround
#define Xoodoo_StaticInitialize()
void Xoodoo_Initialize(void *state);
#define Xoodoo_AddByte(argS, argData, argOffset) ((uint8_t*)argS)[argOffset] ^= (argData)
void Xoodoo_AddBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteBytes(void *state, const uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount);
void Xoodoo_Permute_Nrounds(void *state, unsigned int nrounds);
void Xoodoo_Permute_6rounds(void *state);
void Xoodoo_Permute_12rounds(void *state);
void Xoodoo_ExtractBytes(const void *state, uint8_t *data, unsigned int offset, unsigned int length);
void Xoodoo_ExtractAndAddBytes(const void *state, const uint8_t *input, uint8_t *output, unsigned int offset, unsigned int length);
//#define Xoodoo_FastXoofff_supported
//void Xoofff_AddIs( uint8_t *output, const uint8_t *input, size_t bitLen);
//size_t Xoofff_CompressFastLoop(uint8_t *k, uint8_t *xAccu, const uint8_t *input, size_t length);
//size_t Xoofff_ExpandFastLoop(uint8_t *yAccu, const uint8_t *kRoll, uint8_t *output, size_t length);
#define CyclistFullBlocks_supported
size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen);
size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen);
size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen);
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include <stdio.h>
#include <string.h>
#include "Xoodoo.h"
#define VERBOSE 0
#if (VERBOSE > 0)
#define Dump(__t) printf(__t "\n"); \
printf("a00 %08x, a01 %08x, a02 %08x, a03 %08x\n", a00, a01, a02, a03 ); \
printf("a10 %08x, a11 %08x, a12 %08x, a13 %08x\n", a10, a11, a12, a13 ); \
printf("a20 %08x, a21 %08x, a22 %08x, a23 %08x\n\n", a20, a21, a22, a23 );
#define Dump(__t)
#if (VERBOSE >= 1)
#define Dump1(__t) Dump(__t)
#define Dump1(__t)
#if (VERBOSE >= 2)
#define Dump2(__t) Dump(__t)
#define Dump2(__t)
#if (VERBOSE >= 3)
#define Dump3(__t) Dump(__t)
#define Dump3(__t)
/* ---------------------------------------------------------------- */
void Xoodoo_Initialize(void *state)
memset(state, 0, NLANES*sizeof(tXoodooLane));
/* ---------------------------------------------------------------- */
void Xoodoo_AddBytes(void *argState, const unsigned char *argdata, unsigned int offset, unsigned int length)
if (length == (3*4*4)) {
uint32_t *state = (uint32_t *)argState;
uint32_t *data = (uint32_t *)argdata;
state[0] ^= data[0];
state[1] ^= data[1];
state[2] ^= data[2];
state[3] ^= data[3];
state[4] ^= data[4];
state[5] ^= data[5];
state[6] ^= data[6];
state[7] ^= data[7];
state[8] ^= data[8];
state[9] ^= data[9];
state[10] ^= data[10];
state[11] ^= data[11];
else {
unsigned int sizeLeft = length;
unsigned int lanePosition = offset/4;
unsigned int offsetInLane = offset%4;
const unsigned char *curData = argdata;
uint32_t *state = (uint32_t*)argState;
state += lanePosition;
if ((sizeLeft > 0) && (offsetInLane != 0)) {
unsigned int bytesInLane = 4 - offsetInLane;
uint32_t lane = 0;
if (bytesInLane > sizeLeft)
bytesInLane = sizeLeft;
memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
*state++ ^= lane;
sizeLeft -= bytesInLane;
curData += bytesInLane;
while(sizeLeft >= 4) {
*state++ ^= READ32_UNALIGNED( curData );
sizeLeft -= 4;
curData += 4;
if (sizeLeft > 0) {
uint32_t lane = 0;
memcpy(&lane, curData, sizeLeft);
*state ^= lane;
#error "Not yet implemented"
/* ---------------------------------------------------------------- */
void Xoodoo_OverwriteBytes(void *argstate, const unsigned char *argdata, unsigned int offset, unsigned int length)
if (length == (3*4*4)) {
uint32_t *state = (uint32_t *)argstate;
uint32_t *data = (uint32_t *)argdata;
state[0] = data[0];
state[1] = data[1];
state[2] = data[2];
state[3] = data[3];
state[4] = data[4];
state[5] = data[5];
state[6] = data[6];
state[7] = data[7];
state[8] = data[8];
state[9] = data[9];
state[10] = data[10];
state[11] = data[11];
memcpy((unsigned char*)argstate+offset, argdata, length);
#error "Not yet implemented"
/* ---------------------------------------------------------------- */
void Xoodoo_OverwriteWithZeroes(void *argstate, unsigned int byteCount)
memset(argstate, 0, byteCount);
#error "Not yet implemented"
/* ---------------------------------------------------------------- */
void Xoodoo_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
memcpy(data, (unsigned char*)state+offset, length);
#error "Not yet implemented"
/* ---------------------------------------------------------------- */
void Xoodoo_ExtractAndAddBytes(const void *argState, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
if (length == (3*4*4)) {
uint32_t *state = (uint32_t *)argState;
const uint32_t *ii = (const uint32_t *)input;
uint32_t *oo = (uint32_t *)output;
oo[0] = state[0] ^ ii[0];
oo[1] = state[1] ^ ii[1];
oo[2] = state[2] ^ ii[2];
oo[3] = state[3] ^ ii[3];
oo[4] = state[4] ^ ii[4];
oo[5] = state[5] ^ ii[5];
oo[6] = state[6] ^ ii[6];
oo[7] = state[7] ^ ii[7];
oo[8] = state[8] ^ ii[8];
oo[9] = state[9] ^ ii[9];
oo[10] = state[10] ^ ii[10];
oo[11] = state[11] ^ ii[11];
else {
unsigned int sizeLeft = length;
unsigned int lanePosition = offset/4;
unsigned int offsetInLane = offset%4;
const unsigned char *curInput = input;
unsigned char *curOutput = output;
const uint32_t *state = (const uint32_t*)argState;
state += lanePosition;
if ((sizeLeft > 0) && (offsetInLane != 0)) {
unsigned int bytesInLane = 4 - offsetInLane;
uint32_t lane = *state++ >> (offsetInLane * 8);
if (bytesInLane > sizeLeft)
bytesInLane = sizeLeft;
sizeLeft -= bytesInLane;
do {
*curOutput++ = (*curInput++) ^ (unsigned char)lane;
lane >>= 8;
while ( --bytesInLane != 0);
while(sizeLeft >= 4) {
WRITE32_UNALIGNED( curOutput, READ32_UNALIGNED( curInput ) ^ *state++ );
sizeLeft -= 4;
curInput += 4;
curOutput += 4;
if (sizeLeft > 0) {
uint32_t lane = *state;
do {
*curOutput++ = (*curInput++) ^ (unsigned char)lane;
lane >>= 8;
while ( --sizeLeft != 0 );
#error "Not yet implemented"
/* ---------------------------------------------------------------- */
#define DeclareVars uint32_t a00, a01, a02, a03; \
uint32_t a10, a11, a12, a13; \
uint32_t a20, a21, a22, a23; \
uint32_t v1, v2
#define State2Vars a00 = state[0+0], a01 = state[0+1], a02 = state[0+2], a03 = state[0+3]; \
a10 = state[4+0], a11 = state[4+1], a12 = state[4+2], a13 = state[4+3]; \
a20 = state[8+0], a21 = state[8+1], a22 = state[8+2], a23 = state[8+3]
#define Vars2State state[0+0] = a00, state[0+1] = a01, state[0+2] = a02, state[0+3] = a03; \
state[4+0] = a10, state[4+1] = a11, state[4+2] = a12, state[4+3] = a13; \
state[8+0] = a20, state[8+1] = a21, state[8+2] = a22, state[8+3] = a23
** Theta: Column Parity Mixer
#define Theta() \
v1 = a03 ^ a13 ^ a23; \
v2 = a00 ^ a10 ^ a20; \
v1 = ROTL32(v1, 5) ^ ROTL32(v1, 14); \
a00 ^= v1; \
a10 ^= v1; \
a20 ^= v1; \
v1 = a01 ^ a11 ^ a21; \
v2 = ROTL32(v2, 5) ^ ROTL32(v2, 14); \
a01 ^= v2; \
a11 ^= v2; \
a21 ^= v2; \
v2 = a02 ^ a12 ^ a22; \
v1 = ROTL32(v1, 5) ^ ROTL32(v1, 14); \
a02 ^= v1; \
a12 ^= v1; \
a22 ^= v1; \
v2 = ROTL32(v2, 5) ^ ROTL32(v2, 14); \
a03 ^= v2; \
a13 ^= v2; \
a23 ^= v2
** Rho-west: Plane shift
#define Rho_west() \
a20 = ROTL32(a20, 11); \
a21 = ROTL32(a21, 11); \
a22 = ROTL32(a22, 11); \
a23 = ROTL32(a23, 11); \
v1 = a13; \
a13 = a12; \
a12 = a11; \
a11 = a10; \
a10 = v1
** Iota: Round constants
#define Iota(__rc) a00 ^= __rc
** Chi: Non linear step, on colums
#define Chi() \
a00 ^= ~a10 & a20; \
a10 ^= ~a20 & a00; \
a20 ^= ~a00 & a10; \
a01 ^= ~a11 & a21; \
a11 ^= ~a21 & a01; \
a21 ^= ~a01 & a11; \
a02 ^= ~a12 & a22; \
a12 ^= ~a22 & a02; \
a22 ^= ~a02 & a12; \
a03 ^= ~a13 & a23; \
a13 ^= ~a23 & a03; \
a23 ^= ~a03 & a13
** Rho-east: Plane shift
#define Rho_east() \
a10 = ROTL32(a10, 1); \
a11 = ROTL32(a11, 1); \
a12 = ROTL32(a12, 1); \
a13 = ROTL32(a13, 1); \
v1 = ROTL32(a23, 8); \
a23 = ROTL32(a21, 8); \
a21 = v1; \
v1 = ROTL32(a22, 8); \
a22 = ROTL32(a20, 8); \
a20 = v1
#define Round(__rc) \
Theta(); \
Dump3("Theta"); \
Rho_west(); \
Dump3("Rho-west"); \
Iota(__rc); \
Dump3("Iota"); \
Chi(); \
Dump3("Chi"); \
Rho_east(); \
static const uint32_t RC[MAXROUNDS] = {
void Xoodoo_Permute_Nrounds( uint32_t * state, uint32_t nr )
uint32_t i;
for (i = MAXROUNDS - nr; i < MAXROUNDS; ++i ) {
void Xoodoo_Permute_6rounds( uint32_t * state)
void Xoodoo_Permute_12rounds( uint32_t * state)
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodoo_h_
#define _Xoodoo_h_
#include <stdint.h>
#include <stdlib.h>
#define MAXROUNDS 12
#define NROWS 3
#define NCOLUMS 4
/* Round constants */
#define _rc12 0x00000058
#define _rc11 0x00000038
#define _rc10 0x000003C0
#define _rc9 0x000000D0
#define _rc8 0x00000120
#define _rc7 0x00000014
#define _rc6 0x00000060
#define _rc5 0x0000002C
#define _rc4 0x00000380
#define _rc3 0x000000F0
#define _rc2 0x000001A0
#define _rc1 0x00000012
#if !defined(ROTL32)
#if defined (__arm__) && !defined(__GNUC__)
#define ROTL32(a, offset) __ror(a, (32-(offset))%32)
#elif defined(_MSC_VER)
#define ROTL32(a, offset) _rotl(a, (offset)%32)
#define ROTL32(a, offset) ((((uint32_t)a) << ((offset)%32)) ^ (((uint32_t)a) >> ((32-(offset))%32)))
#if !defined(READ32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define READ32_UNALIGNED(argAddress) (*((const __packed uint32_t*)(argAddress)))
#elif defined(_MSC_VER)
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#define READ32_UNALIGNED(argAddress) (*((const uint32_t*)(argAddress)))
#if !defined(WRITE32_UNALIGNED)
#if defined (__arm__) && !defined(__GNUC__)
#define WRITE32_UNALIGNED(argAddress, argData) (*((__packed uint32_t*)(argAddress)) = (argData))
#elif defined(_MSC_VER)
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#define WRITE32_UNALIGNED(argAddress, argData) (*((uint32_t*)(argAddress)) = (argData))
#if !defined(index)
#define index(__x,__y) ((((__y) % NROWS) * NCOLUMS) + ((__x) % NCOLUMS))
typedef uint32_t tXoodooLane;
The eXtended Keccak Code Package (XKCP)
The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
#ifdef OUTPUT
#include <stdio.h>
#define SnP_AddByte Xoodoo_AddByte
#define SnP_AddBytes Xoodoo_AddBytes
#define SnP_ExtractBytes Xoodoo_ExtractBytes
#define SnP_ExtractAndAddBytes Xoodoo_ExtractAndAddBytes
#define SnP_Permute Xoodoo_Permute_12rounds
#define SnP_OverwriteBytes Xoodoo_OverwriteBytes
size_t Xoodyak_AbsorbKeyedFullBlocks(void *state, const uint8_t *X, size_t XLen)
size_t initialLength = XLen;
do {
SnP_Permute(state); /* Xoodyak_Up(instance, NULL, 0, 0); */
SnP_AddBytes(state, X, 0, Xoodyak_Rkin); /* Xoodyak_Down(instance, X, Xoodyak_Rkin, 0); */
SnP_AddByte(state, 0x01, Xoodyak_Rkin);
X += Xoodyak_Rkin;
XLen -= Xoodyak_Rkin;
} while (XLen >= Xoodyak_Rkin);
return initialLength - XLen;
size_t Xoodyak_AbsorbHashFullBlocks(void *state, const uint8_t *X, size_t XLen)
size_t initialLength = XLen;
do {
SnP_Permute(state); /* Xoodyak_Up(instance, NULL, 0, 0); */
SnP_AddBytes(state, X, 0, Xoodyak_Rhash); /* Xoodyak_Down(instance, X, Xoodyak_Rhash, 0); */
SnP_AddByte(state, 0x01, Xoodyak_Rhash);
X += Xoodyak_Rhash;
XLen -= Xoodyak_Rhash;
} while (XLen >= Xoodyak_Rhash);
return initialLength - XLen;
size_t Xoodyak_SqueezeKeyedFullBlocks(void *state, uint8_t *Y, size_t YLen)
size_t initialLength = YLen;
do {
SnP_AddByte(state, 0x01, 0); /* Xoodyak_Down(instance, NULL, 0, 0); */
SnP_Permute(state); /* Xoodyak_Up(instance, Y, Xoodyak_Rkout, 0); */
SnP_ExtractBytes(state, Y, 0, Xoodyak_Rkout);
Y += Xoodyak_Rkout;
YLen -= Xoodyak_Rkout;
} while (YLen >= Xoodyak_Rkout);
return initialLength - YLen;
size_t Xoodyak_SqueezeHashFullBlocks(void *state, uint8_t *Y, size_t YLen)
size_t initialLength = YLen;
do {
SnP_AddByte(state, 0x01, 0); /* Xoodyak_Down(instance, NULL, 0, 0); */
SnP_Permute(state); /* Xoodyak_Up(instance, Y, Xoodyak_Rhash, 0); */
SnP_ExtractBytes(state, Y, 0, Xoodyak_Rhash);
Y += Xoodyak_Rhash;
YLen -= Xoodyak_Rhash;
} while (YLen >= Xoodyak_Rhash);
return initialLength - YLen;
size_t Xoodyak_EncryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
size_t initialLength = IOLen;
do {
SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout);
SnP_OverwriteBytes(state, O, 0, Xoodyak_Rkout);
SnP_AddByte(state, 0x01, Xoodyak_Rkout);
I += Xoodyak_Rkout;
O += Xoodyak_Rkout;
IOLen -= Xoodyak_Rkout;
} while (IOLen >= Xoodyak_Rkout);
return initialLength - IOLen;
size_t Xoodyak_DecryptFullBlocks(void *state, const uint8_t *I, uint8_t *O, size_t IOLen)
size_t initialLength = IOLen;
do {
SnP_ExtractAndAddBytes(state, I, O, 0, Xoodyak_Rkout);
SnP_AddBytes(state, O, 0, Xoodyak_Rkout);
SnP_AddByte(state, 0x01, Xoodyak_Rkout);
I += Xoodyak_Rkout;
O += Xoodyak_Rkout;
IOLen -= Xoodyak_Rkout;
} while (IOLen >= Xoodyak_Rkout);
return initialLength - IOLen;
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_parameters_h_
#define _Xoodyak_parameters_h_
#define Xoodyak_f_bPrime 48
#define Xoodyak_Rhash 16
#define Xoodyak_Rkin 44
#define Xoodyak_Rkout 24
#define Xoodyak_lRatchet 16
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifdef XoodooReference
#include "displayIntermediateValues.h"
#include <assert.h>
#include <string.h>
#include "Xoodyak.h"
#ifdef OUTPUT
#include <stdlib.h>
#include <string.h>
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length);
static void displayByteString(FILE *f, const char* synopsis, const uint8_t *data, unsigned int length)
unsigned int i;
fprintf(f, "%s:", synopsis);
for(i=0; i<length; i++)
fprintf(f, " %02x", (unsigned int)data[i]);
fprintf(f, "\n");
#define MyMin(a,b) (((a) < (b)) ? (a) : (b))
#ifdef XKCP_has_Xoodoo
#include "Xoodoo-SnP.h"
#define SnP Xoodoo
#define SnP_Permute Xoodoo_Permute_12rounds
#define prefix Xoodyak
#include ""
#undef prefix
#undef SnP
#undef SnP_Permute
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _Xoodyak_h_
#define _Xoodyak_h_
#include "config.h"
#ifdef XKCP_has_Xoodoo
#include <stddef.h>
#include "Cyclist.h"
#include "Xoodoo-SnP.h"
#include "Xoodyak-parameters.h"
KCP_DeclareCyclistStructure(Xoodyak, Xoodoo_stateSizeInBytes, Xoodoo_stateAlignment)
#error This requires an implementation of Xoodoo
The eXtended Keccak Code Package (XKCP)
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#ifndef _align_h_
#define _align_h_
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
#ifdef ALIGN
#undef ALIGN
#if defined(__GNUC__)
#define ALIGN(x) __attribute__ ((aligned(x)))
#elif defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#elif defined(__ARMCC_VERSION)
#define ALIGN(x) __align(x)
#define ALIGN(x)
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
#elif defined( LITTLE_ENDIAN )
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
#elif defined( _LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
#elif defined( __LITTLE_ENDIAN )
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# endif
#elif defined( __BIG_ENDIAN__ )
#elif defined( __LITTLE_ENDIAN__ )
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \
defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ )
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# else
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
/* File generated by ToTargetConfigFile.xsl */
#define XKCP_has_Xoodyak
#define XKCP_has_Xoodoo
The eXtended Keccak Code Package (XKCP)
Xoodyak, designed by Joan Daemen, Seth Hoffert, Michaël Peeters, Gilles Van Assche and Ronny Van Keer.
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to the Keccak Team website:
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
#include "crypto_hash.h"
#ifndef crypto_hash_BYTES
#define crypto_hash_BYTES 32
#include "Xoodyak.h"
int crypto_hash(unsigned char *out, const unsigned char *in, unsigned long long inlen)
Xoodyak_Instance instance;
Xoodyak_Initialize(&instance, NULL, 0, NULL, 0, NULL, 0);
Xoodyak_Absorb(&instance, in, (size_t)inlen);
Xoodyak_Squeeze(&instance, out, crypto_hash_BYTES);
#if 0
unsigned int i;
for (i = 0; i < crypto_hash_BYTES; ++i )
printf("\\x%02x", out[i] );
return 0;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment