From e3e8b784ec4ccf2dbc3e354ed5fd46a0c3e79d97 Mon Sep 17 00:00:00 2001 From: Alexandre Adomnicai Date: Thu, 12 Mar 2020 11:21:38 +0000 Subject: [PATCH] giftcofb128v1.armcortexm --- gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/api.h | 5 ----- gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/encrypt.c | 154 ---------------------------------------------------------------------------------------------------------------------------------------------------------- gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftb128.h | 19 ------------------- gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftb128.s | 2022 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftcofb128v1.h | 81 --------------------------------------------------------------------------------- gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/api.h | 5 +++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/cofb.h | 20 ++++++++++++++++++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/encrypt.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/giftb128.h | 13 +++++++++++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/giftb128.s | 768 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/api.h | 5 +++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/cofb.h | 20 ++++++++++++++++++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/encrypt.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/giftb128.h | 13 +++++++++++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/giftb128.s | 512 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/api.h | 5 +++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/cofb.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/encrypt.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/giftb128.h | 13 +++++++++++++ gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/giftb128.s | 2044 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 20 files changed, 4007 insertions(+), 2281 deletions(-) delete mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/api.h delete mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/encrypt.c delete mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftb128.h delete mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftb128.s delete mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftcofb128v1.h create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/api.h create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/cofb.h create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/encrypt.c create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/giftb128.h create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/giftb128.s create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/api.h create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/cofb.h create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/encrypt.c create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/giftb128.h create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/giftb128.s create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/api.h create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/cofb.h create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/encrypt.c create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/giftb128.h create mode 100644 gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/giftb128.s diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/api.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/api.h deleted file mode 100644 index fb1d58b..0000000 --- a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/api.h +++ /dev/null @@ -1,5 +0,0 @@ -#define CRYPTO_KEYBYTES 16 -#define CRYPTO_NSECBYTES 0 -#define CRYPTO_NPUBBYTES 16 -#define CRYPTO_ABYTES 16 -#define CRYPTO_NOOVERLAP 1 diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/encrypt.c b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/encrypt.c deleted file mode 100644 index 80ebb21..0000000 --- a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/encrypt.c +++ /dev/null @@ -1,154 +0,0 @@ -/******************************************************************************* -* Constant-time 32-bit implementation of the GIFT-COFB authenticated cipher. -* -* @author Alexandre Adomnicai, Nanyang Technological University, -* alexandre.adomnicai@ntu.edu.sg -* @date January 2020 -*******************************************************************************/ -#include -#include "giftb128.h" -#include "giftcofb128v1.h" - -static inline void padding(u32* dest, const u32* src, const u32 no_of_bytes) { - u32 i; - if (no_of_bytes == 0) { - dest[0] = 0x00000080; // little-endian - dest[1] = 0x00000000; - dest[2] = 0x00000000; - dest[3] = 0x00000000; - } - else if (no_of_bytes < GIFT128_BLOCK_SIZE) { - for (i = 0; i < no_of_bytes/4+1; i++) - dest[i] = src[i]; - dest[i-1] &= ~(0xffffffffL << (no_of_bytes % 4)*8); - dest[i-1] |= 0x00000080L << (no_of_bytes % 4)*8; - for (; i < 4; i++) - dest[i] = 0x00000000; - } - else { - dest[0] = src[0]; - dest[1] = src[1]; - dest[2] = src[2]; - dest[3] = src[3]; - } -} - -/**************************************************************************** -* Constant-time implementation of the GIFT-COFB authenticated cipher based on -* fixsliced GIFTb-128. Encryption/decryption is handled by the same function, -* depending on the 'mode' parameter (1/0). -****************************************************************************/ -int giftcofb_crypt(u8* out, const u8* key, const u8* nonce, const u8* ad, - u32 ad_len, const u8* in, u32 in_len, const int encrypting) { - - u32 tmp0, tmp1, emptyA, emptyM, offset[2]; - u32 input[4], rkey[80]; - u8 Y[GIFT128_BLOCK_SIZE]; - - if (!encrypting) { - if (in_len < TAG_SIZE) - return -1; - in_len -= TAG_SIZE; - } - - if(ad_len == 0) - emptyA = 1; - else - emptyA = 0; - - if(in_len == 0) - emptyM =1; - else - emptyM = 0; - - gift128_keyschedule(key, rkey); - giftb128_encrypt_block(Y, rkey, nonce); - offset[0] = ((u32*)Y)[0]; - offset[1] = ((u32*)Y)[1]; - - while(ad_len > GIFT128_BLOCK_SIZE){ - RHO1(input, (u32*)Y, (u32*)ad, GIFT128_BLOCK_SIZE); - DOUBLE_HALF_BLOCK(offset); - XOR_TOP_BAR_BLOCK(input, offset); - giftb128_encrypt_block(Y, rkey, (u8*)input); - ad += GIFT128_BLOCK_SIZE; - ad_len -= GIFT128_BLOCK_SIZE; - } - - TRIPLE_HALF_BLOCK(offset); - if((ad_len % GIFT128_BLOCK_SIZE != 0) || (emptyA)) - TRIPLE_HALF_BLOCK(offset); - if(emptyM) { - TRIPLE_HALF_BLOCK(offset); - TRIPLE_HALF_BLOCK(offset); - } - - RHO1(input, (u32*)Y, (u32*)ad, ad_len); - XOR_TOP_BAR_BLOCK(input, offset); - giftb128_encrypt_block(Y, rkey, (u8*)input); - - while (in_len > GIFT128_BLOCK_SIZE){ - DOUBLE_HALF_BLOCK(offset); - if (encrypting) - RHO((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE); - else - RHO_PRIME((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE); - XOR_TOP_BAR_BLOCK(input, offset); - giftb128_encrypt_block(Y, rkey, (u8*)input); - in += GIFT128_BLOCK_SIZE; - out += GIFT128_BLOCK_SIZE; - in_len -= GIFT128_BLOCK_SIZE; - } - - if(!emptyM){ - TRIPLE_HALF_BLOCK(offset); - if(in_len % GIFT128_BLOCK_SIZE != 0) - TRIPLE_HALF_BLOCK(offset); - if (encrypting) { - RHO((u32*)Y, (u32*)in, input, (u32*)out, in_len); - out += in_len; - } - else { - RHO_PRIME((u32*)Y, (u32*)in, input, (u32*)out, in_len); - in += in_len; - } - XOR_TOP_BAR_BLOCK(input, offset); - giftb128_encrypt_block(Y, rkey, (u8*)input); - } - - if (encrypting) { // encryption mode - memcpy(out, Y, TAG_SIZE); - return 0; - } - // decrypting - tmp0 = 0; - for(tmp1 = 0; tmp1 < TAG_SIZE; tmp1++) - tmp0 |= in[tmp1] ^ Y[tmp1]; - return tmp0; -} - -/**************************************************************************** -* API required by the NIST for the LWC competition. -****************************************************************************/ -int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, - const unsigned char* m, unsigned long long mlen, - const unsigned char* ad, unsigned long long adlen, - const unsigned char* nsec, const unsigned char* npub, - const unsigned char* k) { - (void)nsec; - *clen = mlen + TAG_SIZE; - return giftcofb_crypt(c, k, npub, ad, adlen, m, mlen, COFB_ENCRYPT); -} - -/**************************************************************************** -* API required by the NIST for the LWC competition. -****************************************************************************/ -int crypto_aead_decrypt(unsigned char* m, unsigned long long *mlen, - unsigned char* nsec, const unsigned char* c, - unsigned long long clen, const unsigned char* ad, - unsigned long long adlen, const unsigned char* npub, - const unsigned char *k) { - (void)nsec; - *mlen = clen - TAG_SIZE; - return giftcofb_crypt(m, k, npub, ad, adlen, c, clen, COFB_DECRYPT); -} diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftb128.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftb128.h deleted file mode 100644 index a68d0ef..0000000 --- a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftb128.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef GIFT128_H_ -#define GIFT128_H_ - -#define KEY_SIZE 16 -#define GIFT128_BLOCK_SIZE 16 - -typedef unsigned char u8; -typedef unsigned int u32; - -typedef struct param_128 { - u32 ctr; - u8 nonce[12]; - u32 rkey[80]; -} param_128; - -extern void gift128_keyschedule(const u8* key, u32* rkey); -extern void giftb128_encrypt_block(u8* out_block, const u32* rkey, const u8* in_block); - -#endif // GIFT128_H_ \ No newline at end of file diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftb128.s b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftb128.s deleted file mode 100644 index 550dcdf..0000000 --- a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftb128.s +++ /dev/null @@ -1,2022 +0,0 @@ -/******************************************************************************* -* Constant-time ARM assembly implementation of the GIFTb-128 block cipher used -* in the GIFT-COFB authenticated cipher. -* -* @author Alexandre Adomnicai, Nanyang Technological University, -* alexandre.adomnicai@ntu.edu.sg -* @date January 2020 -*******************************************************************************/ - -.syntax unified -.thumb - -/***************************************************************************** -* Fully unrolled ARM assembly implementation of the GIFT-128 key schedule -* according to the fixsliced representation. -*****************************************************************************/ -@ void gift128_keyschedule(const u8* key, u32* rkey) -.global gift128_keyschedule -.type gift128_keyschedule,%function -gift128_keyschedule: - push {r2-r12, r14} - // load key - ldm r0, {r4-r7} - rev r4, r4 - rev r5, r5 - rev r6, r6 - rev r7, r7 - strd r7, r5, [r1] - strd r6, r4, [r1, #8] - // keyschedule using classical representation for the first 20 rounds - // masks for key update - movw r12, #0x3fff - lsl r12, r12, #16 - movw r11, #0x0003 - lsl r11, r11, #16 - movw r10, #0x000f - movw r9, #0x0fff - // 1st classical key update - and r2, r10, r7, lsr #12 - and r3, r7, r9 - orr r2, r2, r3, lsl #4 - and r3, r12, r7, lsr #2 - orr r2, r2, r3 - and r7, r7, r11 - orr r7, r2, r7, lsl #14 - strd r5, r7, [r1, #16] - // 2nd classical key update - and r2, r10, r6, lsr #12 - and r3, r6, r9 - orr r2, r2, r3, lsl #4 - and r3, r12, r6, lsr #2 - orr r2, r2, r3 - and r6, r6, r11 - orr r6, r2, r6, lsl #14 - strd r4, r6, [r1, #24] - // 3rd classical key update - and r2, r10, r5, lsr #12 - and r3, r5, r9 - orr r2, r2, r3, lsl #4 - and r3, r12, r5, lsr #2 - orr r2, r2, r3 - and r5, r5, r11 - orr r5, r2, r5, lsl #14 - strd r7, r5, [r1, #32] - // 4th classical key update - and r2, r10, r4, lsr #12 - and r3, r4, r9 - orr r2, r2, r3, lsl #4 - and r3, r12, r4, lsr #2 - orr r2, r2, r3 - and r4, r4, r11 - orr r4, r2, r4, lsl #14 - strd r6, r4, [r1, #40] - // 5th classical key update - and r2, r10, r7, lsr #12 - and r3, r7, r9 - orr r2, r2, r3, lsl #4 - and r3, r12, r7, lsr #2 - orr r2, r2, r3 - and r7, r7, r11 - orr r7, r2, r7, lsl #14 - strd r5, r7, [r1, #48] - // 6th classical key update - and r2, r10, r6, lsr #12 - and r3, r6, r9 - orr r2, r2, r3, lsl #4 - and r3, r12, r6, lsr #2 - orr r2, r2, r3 - and r6, r6, r11 - orr r6, r2, r6, lsl #14 - strd r4, r6, [r1, #56] - // 7th classical key update - and r2, r10, r5, lsr #12 - and r3, r5, r9 - orr r2, r2, r3, lsl #4 - and r3, r12, r5, lsr #2 - orr r2, r2, r3 - and r5, r5, r11 - orr r5, r2, r5, lsl #14 - strd r7, r5, [r1, #64] - // 8th classical key update - and r2, r10, r4, lsr #12 - and r3, r4, r9 - orr r2, r2, r3, lsl #4 - and r3, r12, r4, lsr #2 - orr r2, r2, r3 - and r4, r4, r11 - orr r4, r2, r4, lsl #14 - strd r6, r4, [r1, #72] - // rearrange the rkeys to their respective new representations - // REARRANGE_RKEY_0 - movw r3, #0x0055 - movt r3, #0x0055 - movw r10, #0x3333 - movw r11, #0x000f - movt r11, #0x000f - ldrd r6, r4, [r1] - eor r12, r6, r6, lsr #9 - and r12, r12, r3 - eor r6, r12 - eor r6, r6, r12, lsl #9 //SWAPMOVE(r6, r6, 0x00550055, 9); - eor r12, r4, r4, lsr #9 - and r12, r12, r3 - eor r4, r12 - eor r4, r4, r12, lsl #9 //SWAPMOVE(r4, r4, 0x00550055, 9); - eor r12, r6, r6, lsr #18 - and r12, r12, r10 - eor r6, r12 - eor r6, r6, r12, lsl #18 //SWAPMOVE(r6, r6, 0x3333, 18); - eor r12, r4, r4, lsr #18 - and r12, r12, r10 - eor r4, r12 - eor r4, r4, r12, lsl #18 //SWAPMOVE(r4, r4, 0x3333, 18); - eor r12, r6, r6, lsr #12 - and r12, r12, r11 - eor r6, r12 - eor r6, r6, r12, lsl #12 //SWAPMOVE(r6, r6, 0x000f000f, 12); - eor r12, r4, r4, lsr #12 - and r12, r12, r11 - eor r4, r12 - eor r4, r4, r12, lsl #12 //SWAPMOVE(r4, r4, 0x000f000f, 12); - eor r12, r6, r6, lsr #24 - and r12, r12, #0xff - eor r6, r12 - eor r6, r6, r12, lsl #24 //SWAPMOVE(r6, r6, 0x000000ff, 24); - eor r12, r4, r4, lsr #24 - and r12, r12, #0xff - eor r4, r12 - eor r4, r4, r12, lsl #24 //SWAPMOVE(r4, r4, 0x000000ff, 24); - strd r6, r4, [r1] - ldrd r6, r4, [r1, #40] - eor r12, r6, r6, lsr #9 - and r12, r12, r3 - eor r6, r12 - eor r6, r6, r12, lsl #9 //SWAPMOVE(r6, r6, 0x00550055, 9); - eor r12, r4, r4, lsr #9 - and r12, r12, r3 - eor r4, r12 - eor r4, r4, r12, lsl #9 //SWAPMOVE(r4, r4, 0x00550055, 9); - eor r12, r6, r6, lsr #18 - and r12, r12, r10 - eor r6, r12 - eor r6, r6, r12, lsl #18 //SWAPMOVE(r6, r6, 0x3333, 18); - eor r12, r4, r4, lsr #18 - and r12, r12, r10 - eor r4, r12 - eor r4, r4, r12, lsl #18 //SWAPMOVE(r4, r4, 0x3333, 18); - eor r12, r6, r6, lsr #12 - and r12, r12, r11 - eor r6, r12 - eor r6, r6, r12, lsl #12 //SWAPMOVE(r6, r6, 0x000f000f, 12); - eor r12, r4, r4, lsr #12 - and r12, r12, r11 - eor r4, r12 - eor r4, r4, r12, lsl #12 //SWAPMOVE(r4, r4, 0x000f000f, 12); - eor r12, r6, r6, lsr #24 - and r12, r12, #0xff - eor r6, r12 - eor r6, r6, r12, lsl #24 //SWAPMOVE(r6, r6, 0x000000ff, 24); - eor r12, r4, r4, lsr #24 - and r12, r12, #0xff - eor r4, r12 - eor r4, r4, r12, lsl #24 //SWAPMOVE(r4, r4, 0x000000ff, 24); - strd r6, r4, [r1, #40] - // REARRANGE_RKEY_1 - movw r3, #0x1111 - movt r3, #0x1111 - movw r10, #0x0303 - movt r10, #0x0303 - ldrd r5, r7, [r1, #8] - eor r8, r7, r7, lsr #3 - and r8, r8, r3 - eor r7, r8 - eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x11111111, 3); - eor r8, r5, r5, lsr #3 - and r8, r8, r3 - eor r5, r8 - eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x11111111, 3); - eor r8, r7, r7, lsr #6 - and r8, r8, r10 - eor r7, r8 - eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x03030303, 6); - eor r8, r5, r5, lsr #6 - and r8, r8, r10 - eor r5, r8 - eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x03030303, 6); - eor r8, r7, r7, lsr #12 - and r8, r8, r11 - eor r7, r8 - eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); - eor r8, r5, r5, lsr #12 - and r8, r8, r11 - eor r5, r8 - eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); - eor r8, r7, r7, lsr #24 - and r8, r8, #0xff - eor r7, r8 - eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); - eor r8, r5, r5, lsr #24 - and r8, r8, #0xff - eor r5, r8 - eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); - strd r5, r7, [r1, #8] - ldrd r5, r7, [r1, #48] - eor r8, r7, r7, lsr #3 - and r8, r8, r3 - eor r7, r8 - eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x11111111, 3); - eor r8, r5, r5, lsr #3 - and r8, r8, r3 - eor r5, r8 - eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x11111111, 3); - eor r8, r7, r7, lsr #6 - and r8, r8, r10 - eor r7, r8 - eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x03030303, 6); - eor r8, r5, r5, lsr #6 - and r8, r8, r10 - eor r5, r8 - eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x03030303, 6); - eor r8, r7, r7, lsr #12 - and r8, r8, r11 - eor r7, r8 - eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); - eor r8, r5, r5, lsr #12 - and r8, r8, r11 - eor r5, r8 - eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); - eor r8, r7, r7, lsr #24 - and r8, r8, #0xff - eor r7, r8 - eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); - eor r8, r5, r5, lsr #24 - and r8, r8, #0xff - eor r5, r8 - eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); - strd r5, r7, [r1, #48] - // REARRANGE_RKEY_2 - movw r3, #0xaaaa - movw r10, #0x3333 - movw r11, #0xf0f0 - ldrd r5, r7, [r1, #16] - eor r8, r7, r7, lsr #15 - and r8, r8, r3 - eor r7, r8 - eor r7, r7, r8, lsl #15 //SWAPMOVE(r7, r7, 0x0000aaaa, 15); - eor r8, r5, r5, lsr #15 - and r8, r8, r3 - eor r5, r8 - eor r5, r5, r8, lsl #15 //SWAPMOVE(r5, r5, 0x0000aaaa, 15); - eor r8, r7, r7, lsr #18 - and r8, r8, r10 - eor r7, r8 - eor r7, r7, r8, lsl #18 //SWAPMOVE(r7, r7, 0x00003333, 18); - eor r8, r5, r5, lsr #18 - and r8, r8, r10 - eor r5, r8 - eor r5, r5, r8, lsl #18 //SWAPMOVE(r5, r5, 0x00003333, 18); - eor r8, r7, r7, lsr #12 - and r8, r8, r11 - eor r7, r8 - eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); - eor r8, r5, r5, lsr #12 - and r8, r8, r11 - eor r5, r8 - eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); - eor r8, r7, r7, lsr #24 - and r8, r8, #0xff - eor r7, r8 - eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x00000ff, 24); - eor r8, r5, r5, lsr #24 - and r8, r8, #0xff - eor r5, r8 - eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); - strd r5, r7, [r1, #16] - ldrd r5, r7, [r1, #56] - eor r8, r7, r7, lsr #15 - and r8, r8, r3 - eor r7, r8 - eor r7, r7, r8, lsl #15 //SWAPMOVE(r7, r7, 0x0000aaaa, 15); - eor r8, r5, r5, lsr #15 - and r8, r8, r3 - eor r5, r8 - eor r5, r5, r8, lsl #15 //SWAPMOVE(r5, r5, 0x0000aaaa, 15); - eor r8, r7, r7, lsr #18 - and r8, r8, r10 - eor r7, r8 - eor r7, r7, r8, lsl #18 //SWAPMOVE(r7, r7, 0x00003333, 18); - eor r8, r5, r5, lsr #18 - and r8, r8, r10 - eor r5, r8 - eor r5, r5, r8, lsl #18 //SWAPMOVE(r5, r5, 0x00003333, 18); - eor r8, r7, r7, lsr #12 - and r8, r8, r11 - eor r7, r8 - eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); - eor r8, r5, r5, lsr #12 - and r8, r8, r11 - eor r5, r8 - eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); - eor r8, r7, r7, lsr #24 - and r8, r8, #0xff - eor r7, r8 - eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); - eor r8, r5, r5, lsr #24 - and r8, r8, #0xff - eor r5, r8 - eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); - strd r5, r7, [r1, #56] - // REARRANGE_RKEY_3 - movw r3, #0x0a0a - movt r3, #0x0a0a - movw r10, #0x00cc - movt r10, #0x00cc - ldrd r5, r7, [r1, #24] - eor r8, r7, r7, lsr #3 - and r8, r8, r3 - eor r7, r8 - eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x0a0a0a0a, 3); - eor r8, r5, r5, lsr #3 - and r8, r8, r3 - eor r5, r8 - eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x0a0a0a0a, 3); - eor r8, r7, r7, lsr #6 - and r8, r8, r10 - eor r7, r8 - eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x00cc00cc, 6); - eor r8, r5, r5, lsr #6 - and r8, r8, r10 - eor r5, r8 - eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x00cc00cc, 6); - eor r8, r7, r7, lsr #12 - and r8, r8, r11 - eor r7, r8 - eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); - eor r8, r5, r5, lsr #12 - and r8, r8, r11 - eor r5, r8 - eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); - eor r8, r7, r7, lsr #24 - and r8, r8, #0xff - eor r7, r8 - eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); - eor r8, r5, r5, lsr #24 - and r8, r8, #0xff - eor r5, r8 - eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); - strd r5, r7, [r1, #24] - ldrd r5, r7, [r1, #64] - eor r8, r7, r7, lsr #3 - and r8, r8, r3 - eor r7, r8 - eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x0a0a0a0a, 3); - eor r8, r5, r5, lsr #3 - and r8, r8, r3 - eor r5, r8 - eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x0a0a0a0a, 3); - eor r8, r7, r7, lsr #6 - and r8, r8, r10 - eor r7, r8 - eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x00cc00cc, 6); - eor r8, r5, r5, lsr #6 - and r8, r8, r10 - eor r5, r8 - eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x00cc00cc, 6); - eor r8, r7, r7, lsr #12 - and r8, r8, r11 - eor r7, r8 - eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); - eor r8, r5, r5, lsr #12 - and r8, r8, r11 - eor r5, r8 - eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); - eor r8, r7, r7, lsr #24 - and r8, r8, #0xff - eor r7, r8 - eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x0000ff00, 24); - eor r8, r5, r5, lsr #24 - and r8, r8, #0xff - eor r5, r8 - eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x0000ff00, 24); - strd r5, r7, [r1, #64] - //keyschedule according to the new representations - // KEY_DOULBE/TRIPLE_UPDATE_0 - // masks - movw r12, #0x3333 - movt r12, #0x3333 - mvn r11, r12 - movw r10, #0x3333 - movw r9, #0x4444 - movt r9, #0x5555 - movw r8, #0x1100 - movt r8, #0x5555 - ldrd r4, r5, [r1] - and r2, r12, r4, ror #24 - and r4, r4, r11 - orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4) - eor r2, r4, r4, lsr #1 - and r2, r2, r8 - eor r4, r4, r2 - eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1) - eor r2, r5, r5, lsr #16 - and r2, r2, r10 - eor r5, r5, r2 - eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16) - eor r2, r5, r5, lsr #1 - and r2, r2, r9 - eor r5, r5, r2 - eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1) - strd r5, r4, [r1, #80] - and r2, r12, r5, ror #24 - and r5, r5, r11 - orr r5, r2, r5, ror #16 //KEY_TRIPLE_UPDATE_1(r5) - eor r2, r5, r5, lsr #1 - and r2, r2, r8 - eor r5, r5, r2 - eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x55551100, 1) - eor r2, r4, r4, lsr #16 - and r2, r2, r10 - eor r4, r4, r2 - eor r4, r4, r2, lsl #16 //SWAPMOVE(r4, r4, 0x00003333, 16) - eor r2, r4, r4, lsr #1 - and r2, r2, r9 - eor r4, r4, r2 - eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x555544444, 1) - strd r4, r5, [r1, #160] - and r2, r12, r4, ror #24 - and r4, r4, r11 - orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4) - eor r2, r4, r4, lsr #1 - and r2, r2, r8 - eor r4, r4, r2 - eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1) - eor r2, r5, r5, lsr #16 - and r2, r2, r10 - eor r5, r5, r2 - eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16) - eor r2, r5, r5, lsr #1 - and r2, r2, r9 - eor r5, r5, r2 - eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1) - strd r5, r4, [r1, #240] - ldrd r4, r5, [r1, #40] - and r2, r12, r4, ror #24 - and r4, r4, r11 - orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4) - eor r2, r4, r4, lsr #1 - and r2, r2, r8 - eor r4, r4, r2 - eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1) - eor r2, r5, r5, lsr #16 - and r2, r2, r10 - eor r5, r5, r2 - eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16) - eor r2, r5, r5, lsr #1 - and r2, r2, r9 - eor r5, r5, r2 - eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1) - strd r5, r4, [r1, #120] - and r2, r12, r5, ror #24 - and r5, r5, r11 - orr r5, r2, r5, ror #16 //KEY_TRIPLE_UPDATE_1(r5) - eor r2, r5, r5, lsr #1 - and r2, r2, r8 - eor r5, r5, r2 - eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x55551100, 1) - eor r2, r4, r4, lsr #16 - and r2, r2, r10 - eor r4, r4, r2 - eor r4, r4, r2, lsl #16 //SWAPMOVE(r4, r4, 0x00003333, 16) - eor r2, r4, r4, lsr #1 - and r2, r2, r9 - eor r4, r4, r2 - eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x555544444, 1) - strd r4, r5, [r1, #200] - and r2, r12, r4, ror #24 - and r4, r4, r11 - orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4) - eor r2, r4, r4, lsr #1 - and r2, r2, r8 - eor r4, r4, r2 - eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1) - eor r2, r5, r5, lsr #16 - and r2, r2, r10 - eor r5, r5, r2 - eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16) - eor r2, r5, r5, lsr #1 - and r2, r2, r9 - eor r5, r5, r2 - eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1) - strd r5, r4, [r1, #280] - // KEY_DOULBE/TRIPLE_UPDATE_2 - // masks - movw r12, #0x0f00 - movt r12, #0x0f00 - movw r11, #0x0003 - movt r11, #0x0003 - movw r10, #0x003f - movt r10, #0x003f - lsl r9, r11, #8 //0x03000300 - movw r8, #0x0007 - movt r8, #0x0007 - movw r7, #0x001f - movt r7, #0x001f - ldrd r4, r5, [r1, #8] - and r2, r9, r4, lsr #6 - and r3, r4, r10, lsl #8 - orr r2, r2, r3, lsl #2 - and r3, r8, r4, lsr #5 - orr r2, r2, r3 - and r4, r4, r7 - orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4) - and r2, r12, r5, lsr #4 - and r3, r5, r12 - orr r2, r2, r3, lsl #4 - and r3, r11, r5, lsr #6 - orr r2, r2, r3 - and r5, r5, r10 - orr r5, r2, r5, lsl #2 //KEY_DOUBLE_UPDATE_2(r5) - strd r5, r4, [r1, #88] - and r2, r9, r5, lsr #6 - and r3, r5, r10, lsl #8 - orr r2, r2, r3, lsl #2 - and r3, r8, r5, lsr #5 - orr r2, r2, r3 - and r5, r5, r7 - orr r5, r2, r5, lsl #3 //KEY_TRIPLE_UPDATE_2(r5) - and r2, r12, r4, lsr #4 - and r3, r4, r12 - orr r2, r2, r3, lsl #4 - and r3, r11, r4, lsr #6 - orr r2, r2, r3 - and r4, r4, r10 - orr r4, r2, r4, lsl #2 //KEY_DOUBLE_UPDATE_2(r4) - strd r4, r5, [r1, #168] - and r2, r9, r4, lsr #6 - and r3, r4, r10, lsl #8 - orr r2, r2, r3, lsl #2 - and r3, r8, r4, lsr #5 - orr r2, r2, r3 - and r4, r4, r7 - orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4) - and r2, r12, r5, lsr #4 - and r3, r5, r12 - orr r2, r2, r3, lsl #4 - and r3, r11, r5, lsr #6 - orr r2, r2, r3 - and r5, r5, r10 - orr r5, r2, r5, lsl#2 //KEY_DOUBLE_UPDATE_2(r5) - strd r5, r4, [r1, #248] - ldrd r4, r5, [r1, #48] - and r2, r9, r4, lsr #6 - and r3, r4, r10, lsl #8 - orr r2, r2, r3, lsl #2 - and r3, r8, r4, lsr #5 - orr r2, r2, r3 - and r4, r4, r7 - orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4) - and r2, r12, r5, lsr #4 - and r3, r5, r12 - orr r2, r2, r3, lsl #4 - and r3, r11, r5, lsr #6 - orr r2, r2, r3 - and r5, r5, r10 - orr r5, r2, r5, lsl #2 //KEY_DOUBLE_UPDATE_2(r5) - strd r5, r4, [r1, #128] - and r2, r9, r5, lsr #6 - and r3, r5, r10, lsl #8 - orr r2, r2, r3, lsl #2 - and r3, r8, r5, lsr #5 - orr r2, r2, r3 - and r5, r5, r7 - orr r5, r2, r5, lsl #3 //KEY_TRIPLE_UPDATE_2(r5) - and r2, r12, r4, lsr #4 - and r3, r4, r12 - orr r2, r2, r3, lsl #4 - and r3, r11, r4, lsr #6 - orr r2, r2, r3 - and r4, r4, r10 - orr r4, r2, r4, lsl #2 //KEY_DOUBLE_UPDATE_2(r4) - strd r4, r5, [r1, #208] - and r2, r9, r4, lsr #6 - and r3, r4, r10, lsl #8 - orr r2, r2, r3, lsl #2 - and r3, r8, r4, lsr #5 - orr r2, r2, r3 - and r4, r4, r7 - orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4) - and r2, r12, r5, lsr #4 - and r3, r5, r12 - orr r2, r2, r3, lsl #4 - and r3, r11, r5, lsr #6 - orr r2, r2, r3 - and r5, r5, r10 - orr r5, r2, r5, lsl#2 //KEY_DOUBLE_UPDATE_2(r5) - strd r5, r4, [r1, #288] - // KEY_DOULBE/TRIPLE_UPDATE_2 - // masks - movw r12, #0x5555 - movt r12, #0x5555 - mvn r11, r12 - ldrd r4, r5, [r1, #16] - and r2, r12, r4, ror #24 - and r4, r11, r4, ror #20 - orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r4) - and r2, r11, r5, ror #24 - and r5, r12, r5, ror #16 - orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r5) - strd r5, r4, [r1, #96] - and r2, r12, r5, ror #24 - and r5, r11, r5, ror #20 - orr r5, r5, r2 //KEY_TRIPLE_UPDATE_2(r5) - and r2, r11, r4, ror #24 - and r4, r12, r4, ror #16 - orr r4, r4, r2 //KEY_DOUBLE_UPDATE_2(r4) - strd r4, r5, [r1, #176] - and r2, r12, r4, ror #24 - and r4, r11, r4, ror #20 - orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r4) - and r2, r11, r5, ror #24 - and r5, r12, r5, ror #16 - orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r5) - strd r5, r4, [r1, #256] - ldrd r4, r5, [r1, #56] - and r2, r12, r4, ror #24 - and r4, r11, r4, ror #20 - orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r5) - and r2, r11, r5, ror #24 - and r5, r12, r5, ror #16 - orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r4) - strd r5, r4, [r1, #136] - and r2, r12, r5, ror #24 - and r5, r11, r5, ror #20 - orr r5, r5, r2 //KEY_TRIPLE_UPDATE_2(r4) - and r2, r11, r4, ror #24 - and r4, r12, r4, ror #16 - orr r4, r4, r2 //KEY_DOUBLE_UPDATE_2(r5) - strd r4, r5, [r1, #216] - and r2, r12, r4, ror #24 - and r4, r11, r4, ror #20 - orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r5) - and r2, r11, r5, ror #24 - and r5, r12, r5, ror #16 - orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r4) - strd r5, r4, [r1, #296] - // KEY_DOULBE/TRIPLE_UPDATE_3 - // masks - movw r12, #0x0707 - movt r12, #0x0707 - movw r11, #0xc0c0 - movw r10, #0x3030 - movw r9, #0x0303 - movt r9, #0x0303 - lsl r8, r12, #4 - movw r7, #0x1010 - movt r7, #0x1010 - movw r6, #0xf0f0 - ldrd r4, r5, [r1, #24] - and r2, r10, r4, lsr #18 - and r3, r4, r7, lsr #4 - orr r2, r2, r3, lsl #3 - and r3, r11, r4, lsr #14 - orr r2, r2, r3 - and r3, r4, r12, lsr #11 - orr r2, r2, r3, lsl #15 - and r3, r12, r4, lsr #1 - orr r2, r2, r3 - and r4, r4, r7, lsr #16 - orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4) - and r2, r9, r5, lsr #2 - and r3, r9, r5 - orr r2, r2, r3, lsl #2 - and r3, r8, r5, lsr #1 - orr r2, r2, r3 - and r5, r5, r7 - orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5) - strd r5, r4, [r1, #104] - and r2, r10, r5, lsr #18 - and r3, r5, r7, lsr #4 - orr r2, r2, r3, lsl #3 - and r3, r11, r5, lsr #14 - orr r2, r2, r3 - and r3, r5, r12, lsr #11 - orr r2, r2, r3, lsl #15 - and r3, r12, r5, lsr #1 - orr r2, r2, r3 - and r5, r5, r7, lsr #16 - orr r5, r2, r5, lsl #19 //KEY_TRIPLE_UPDATE_4(r5) - and r2, r9, r4, lsr #2 - and r3, r9, r4 - orr r2, r2, r3, lsl #2 - and r3, r8, r4, lsr #1 - orr r2, r2, r3 - and r4, r4, r7 - orr r4, r2, r4, lsl #3 //KEY_DOUBLE_UPDATE_4(r4) - strd r4, r5, [r1, #184] - and r2, r10, r4, lsr #18 - and r3, r4, r7, lsr #4 - orr r2, r2, r3, lsl #3 - and r3, r11, r4, lsr #14 - orr r2, r2, r3 - and r3, r4, r12, lsr #11 - orr r2, r2, r3, lsl #15 - and r3, r12, r4, lsr #1 - orr r2, r2, r3 - and r4, r4, r7, lsr #16 - orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4) - and r2, r9, r5, lsr #2 - and r3, r9, r5 - orr r2, r2, r3, lsl #2 - and r3, r8, r5, lsr #1 - orr r2, r2, r3 - and r5, r5, r7 - orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5) - strd r5, r4, [r1, #264] - ldrd r4, r5, [r1, #64] - and r2, r10, r4, lsr #18 - and r3, r4, r7, lsr #4 - orr r2, r2, r3, lsl #3 - and r3, r11, r4, lsr #14 - orr r2, r2, r3 - and r3, r4, r12, lsr #11 - orr r2, r2, r3, lsl #15 - and r3, r12, r4, lsr #1 - orr r2, r2, r3 - and r4, r4, r7, lsr #16 - orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4) - and r2, r9, r5, lsr #2 - and r3, r9, r5 - orr r2, r2, r3, lsl #2 - and r3, r8, r5, lsr #1 - orr r2, r2, r3 - and r5, r5, r7 - orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5) - strd r5, r4, [r1, #144] - and r2, r10, r5, lsr #18 - and r3, r5, r7, lsr #4 - orr r2, r2, r3, lsl #3 - and r3, r11, r5, lsr #14 - orr r2, r2, r3 - and r3, r5, r12, lsr #11 - orr r2, r2, r3, lsl #15 - and r3, r12, r5, lsr #1 - orr r2, r2, r3 - and r5, r5, r7, lsr #16 - orr r5, r2, r5, lsl #19 //KEY_TRIPLE_UPDATE_4(r5) - and r2, r9, r4, lsr #2 - and r3, r9, r4 - orr r2, r2, r3, lsl #2 - and r3, r8, r4, lsr #1 - orr r2, r2, r3 - and r4, r4, r7 - orr r4, r2, r4, lsl #3 //KEY_DOUBLE_UPDATE_4(r4) - strd r4, r5, [r1, #224] - and r2, r10, r4, lsr #18 - and r3, r4, r7, lsr #4 - orr r2, r2, r3, lsl #3 - and r3, r11, r4, lsr #14 - orr r2, r2, r3 - and r3, r4, r12, lsr #11 - orr r2, r2, r3, lsl #15 - and r3, r12, r4, lsr #1 - orr r2, r2, r3 - and r4, r4, r7, lsr #16 - orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4) - and r2, r9, r5, lsr #2 - and r3, r9, r5 - orr r2, r2, r3, lsl #2 - and r3, r8, r5, lsr #1 - orr r2, r2, r3 - and r5, r5, r7 - orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5) - strd r5, r4, [r1, #304] - // KEY_DOULBE/TRIPLE_UPDATE_4 - // masks - movw r12, #0x0fff - movw r11, #0x000f - lsl r10, r12, #16 - movw r9, #0x003f - movw r8, #0x00ff - movw r7, #0x03ff - lsl r7, r7, #16 - ldrd r4, r5, [r1, #32] - and r2, r7, r4, lsr #6 - and r3, r4, r9, lsl #16 - orr r2, r2, r3, lsl #10 - and r3, r12, r4, lsr #4 - orr r2, r2, r3 - and r4, r4, r11 - orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4) - and r2, r10, r5, lsr #4 - and r3, r5, r11, lsl #16 - orr r2, r2, r3, lsl #12 - and r3, r8, r5, lsr #8 - orr r2, r2, r3 - and r5, r5, r8 - orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5) - strd r5, r4, [r1, #112] - and r2, r7, r5, lsr #6 - and r3, r5, r9, lsl #16 - orr r2, r2, r3, lsl #10 - and r3, r12, r5, lsr #4 - orr r2, r2, r3 - and r5, r5, r11 - orr r5, r2, r5, lsl #12 //KEY_TRIPLE_UPDATE_4(r5) - and r2, r10, r4, lsr #4 - and r3, r4, r11, lsl #16 - orr r2, r2, r3, lsl #12 - and r3, r8, r4, lsr #8 - orr r2, r2, r3 - and r4, r4, r8 - orr r4, r2, r4, lsl #8 //KEY_DOUBLE_UPDATE_4(r4) - strd r4, r5, [r1, #192] - and r2, r7, r4, lsr #6 - and r3, r4, r9, lsl #16 - orr r2, r2, r3, lsl #10 - and r3, r12, r4, lsr #4 - orr r2, r2, r3 - and r4, r4, r11 - orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4) - and r2, r10, r5, lsr #4 - and r3, r5, r11, lsl #16 - orr r2, r2, r3, lsl #12 - and r3, r8, r5, lsr #8 - orr r2, r2, r3 - and r5, r5, r8 - orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5) - strd r5, r4, [r1, #272] - ldrd r4, r5, [r1, #72] - and r2, r7, r4, lsr #6 - and r3, r4, r9, lsl #16 - orr r2, r2, r3, lsl #10 - and r3, r12, r4, lsr #4 - orr r2, r2, r3 - and r4, r4, r11 - orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4) - and r2, r10, r5, lsr #4 - and r3, r5, r11, lsl #16 - orr r2, r2, r3, lsl #12 - and r3, r8, r5, lsr #8 - orr r2, r2, r3 - and r5, r5, r8 - orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5) - strd r5, r4, [r1, #152] - and r2, r7, r5, lsr #6 - and r3, r5, r9, lsl #16 - orr r2, r2, r3, lsl #10 - and r3, r12, r5, lsr #4 - orr r2, r2, r3 - and r5, r5, r11 - orr r5, r2, r5, lsl #12 //KEY_TRIPLE_UPDATE_4(r5) - and r2, r10, r4, lsr #4 - and r3, r4, r11, lsl #16 - orr r2, r2, r3, lsl #12 - and r3, r8, r4, lsr #8 - orr r2, r2, r3 - and r4, r4, r8 - orr r4, r2, r4, lsl #8 //KEY_DOUBLE_UPDATE_4(r4) - strd r4, r5, [r1, #232] - and r2, r7, r4, lsr #6 - and r3, r4, r9, lsl #16 - orr r2, r2, r3, lsl #10 - and r3, r12, r4, lsr #4 - orr r2, r2, r3 - and r4, r4, r11 - orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4) - and r2, r10, r5, lsr #4 - and r3, r5, r11, lsl #16 - orr r2, r2, r3, lsl #12 - and r3, r8, r5, lsr #8 - orr r2, r2, r3 - and r5, r5, r8 - orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5) - strd r5, r4, [r1, #312] - pop {r2-r12,r14} - bx lr - - -/***************************************************************************** -* Fully unrolled ARM assembly implementation of the GIFTb-128 block cipher. -* This function simply encrypts a 128-bit block, without any operation mode. -*****************************************************************************/ -@ void giftb128_encrypt_block(u8 *out, const u32* rkey, const u8 *block) -.global giftb128_encrypt_block -.type giftb128_encrypt_block,%function -giftb128_encrypt_block: - push {r2-r12,r14} - - // load plaintext blocks - ldm r2, {r9-r12} - // endianness - rev r9, r9 - rev r10, r10 - rev r11, r11 - rev r12, r12 - - // masks for HALF/BYTE/NIBBLE rotations - movw r2, #0x1111 - movt r2, #0x1111 //for NIBBLE_ROR - movw r3, #0x000f - movt r3, #0x000f //for HALF_ROR - mvn r4, r2, lsl #3 //0x7777777 for NIBBLE_ROR - - // ------------------ 1st QUINTUPLE ROUND ------------------ - // 1st round - movw r5, 0x0008 - movt r5, 0x1000 //load rconst - ldrd r6, r7, [r1] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - and r8, r4, r12, lsr #1 - and r12, r12, r2 - orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1) - and r8, r4, r11 - and r11, r2, r11, lsr #3 - orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) - orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR - and r8, r14, r10, lsr #2 - and r10, r10, r14 - orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 2nd round - movw r5, 0x8000 - movt r5, 0x8001 //load rconst - ldrd r6, r7, [r1, #8] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR - and r8, r14, r9, lsr #4 - and r9, r9, r3 - orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4) - and r8, r3, r11, lsr #12 - and r11, r11, r14 - orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) - rev16 r10, r10 //HALF_ROR(r10, 8) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 3rd round - movw r5, 0x0002 - movt r5, 0x5400 //load rconst - ldrd r6, r7, [r1, #16] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE - eor r8, r10, r10, lsr #1 - and r8, r8, r14 - eor r10, r10, r8 - eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) - eor r8, r12, r12, lsr #1 - and r8, r8, r14, lsr #16 - eor r12, r12, r8 - eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x55550000, 1) - eor r8, r11, r11, lsr #1 - and r8, r8, r14, lsl #16 - eor r11, r11, r8 - eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x00005555, 1) - eor r10, r10, r6 //add 1st keyword - eor r11, r7, r11, ror #16 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 4th round - movw r5, 0x0181 - movt r5, 0x0101 //load rconst - ldrd r6, r7, [r1, #24] //load rkey - and r8, r11, r12, ror #16 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r8, r12, ror #16 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR - and r8, r14, r10, lsr #4 - and r10, r10, r14 - orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) - orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR - mvn r8, r14 - and r8, r8, r11, lsl #6 - and r11, r14, r11, lsr #2 - orr r11, r11, r8 //BYTE_ROR(r11, 2) - mvn r8, r14, lsr #6 - and r8, r8, r9, lsr #6 - and r9, r14, r9 - orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 5th round - movw r5, 0x001f - movt r5, 0x8000 //load rconst - ldrd r6, r7, [r1, #32] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - eor r10, r6, r10, ror #16 //add 1st keyword - eor r11, r7, r11, ror #8 //add 2nd keyword - eor r9, r9, r5 //add rconst - - // ------------------ 2nd QUINTUPLE ROUND ------------------ - // 1st round - movw r5, 0x8880 - movt r5, 0x1088 //load rconst - ldrd r6, r7, [r1, #40] //load rkey - and r8, r11, r12, ror #24 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r8, r12, ror #24 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - and r8, r4, r9, lsr #1 - and r9, r9, r2 - orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1) - and r8, r4, r11 - and r11, r2, r11, lsr #3 - orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) - orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR - and r8, r14, r10, lsr #2 - and r10, r10, r14 - orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 2nd round - movw r5, 0xe000 - movt r5, 0x6001 //load rconst - ldrd r6, r7, [r1, #48] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR - and r8, r14, r12, lsr #4 - and r12, r12, r3 - orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4) - and r8, r3, r11, lsr #12 - and r11, r11, r14 - orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) - rev16 r10, r10 //HALF_ROR(r10, 8) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 3rd round - movw r5, 0x0002 - movt r5, 0x5150 //load rconst - ldrd r6, r7, [r1, #56] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE - eor r8, r10, r10, lsr #1 - and r8, r8, r14 - eor r10, r10, r8 - eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) - eor r8, r9, r9, lsr #1 - and r8, r8, r14, lsr #16 - eor r9, r9, r8 - eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1) - eor r8, r11, r11, lsr #1 - and r8, r8, r14, lsl #16 - eor r11, r11, r8 - eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) - eor r10, r10, r6 //add 1st keyword - eor r11, r7, r11, ror #16 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 4th round - movw r5, 0x0180 - movt r5, 0x0303 //load rconst - ldrd r6, r7, [r1, #64] //load rkey - and r8, r11, r9, ror #16 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r8, r9, ror #16 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR - and r8, r14, r10, lsr #4 - and r10, r10, r14 - orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) - orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR - mvn r8, r14 - and r8, r8, r11, lsl #6 - and r11, r14, r11, lsr #2 - orr r11, r11, r8 //BYTE_ROR(r11, 2) - mvn r8, r14, lsr #6 - and r8, r8, r12, lsr #6 - and r12, r14, r12 - orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 5th round - movw r5, 0x002f - movt r5, 0x8000 //load rconst - ldrd r6, r7, [r1, #72] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - eor r10, r6, r10, ror #16 //add 1st keyword - eor r11, r7, r11, ror #8 //add 2nd keyword - eor r12, r12, r5 //add rconst - - // ------------------ 3rd QUINTUPLE ROUND ------------------ - // 1st round - movw r5, 0x8880 - movt r5, 0x1008 //load rconst - ldrd r6, r7, [r1, #80] //load rkey - and r8, r11, r9, ror #24 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r8, r9, ror #24 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - and r8, r4, r12, lsr #1 - and r12, r12, r2 - orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1) - and r8, r4, r11 - and r11, r2, r11, lsr #3 - orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) - orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR - and r8, r14, r10, lsr #2 - and r10, r10, r14 - orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 2nd round - movw r5, 0x6000 - movt r5, 0x6001 //load rconst - ldrd r6, r7, [r1, #88] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR - and r8, r14, r9, lsr #4 - and r9, r9, r3 - orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4) - and r8, r3, r11, lsr #12 - and r11, r11, r14 - orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) - rev16 r10, r10 //HALF_ROR(r10, 8) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 3rd round - movw r5, 0x0002 - movt r5, 0x4150 //load rconst - ldrd r6, r7, [r1, #96] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE - eor r8, r10, r10, lsr #1 - and r8, r8, r14 - eor r10, r10, r8 - eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) - eor r8, r12, r12, lsr #1 - and r8, r8, r14, lsr #16 - eor r12, r12, r8 - eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x00005555, 1) - eor r8, r11, r11, lsr #1 - and r8, r8, r14, lsl #16 - eor r11, r11, r8 - eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) - eor r10, r10, r6 //add 1st keyword - eor r11, r7, r11, ror #16 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 4th round - movw r5, 0x0080 - movt r5, 0x0303 //load rconst - ldrd r6, r7, [r1, #104] //load rkey - and r8, r11, r12, ror #16 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r8, r12, ror #16 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR - and r8, r14, r10, lsr #4 - and r10, r10, r14 - orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) - orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR - mvn r8, r14 - and r8, r8, r11, lsl #6 - and r11, r14, r11, lsr #2 - orr r11, r11, r8 //BYTE_ROR(r11, 2) - mvn r8, r14, lsr #6 - and r8, r8, r9, lsr #6 - and r9, r14, r9 - orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 5th round - movw r5, 0x0027 - movt r5, 0x8000 //load rconst - ldrd r6, r7, [r1, #112] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - eor r10, r6, r10, ror #16 //add 1st keyword - eor r11, r7, r11, ror #8 //add 2nd keyword - eor r9, r9, r5 //add rconst - - // ------------------ 4th QUINTUPLE ROUND ------------------ - // 1st round - movw r5, 0x8880 - movt r5, 0x1000 //load rconst - ldrd r6, r7, [r1, #120] //load rkey - and r8, r11, r12, ror #24 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r8, r12, ror #24 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - and r8, r4, r9, lsr #1 - and r9, r9, r2 - orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1) - and r8, r4, r11 - and r11, r2, r11, lsr #3 - orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) - orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR - and r8, r14, r10, lsr #2 - and r10, r10, r14 - orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 2nd round - movw r5, 0xe000 - movt r5, 0x4001 //load rconst - ldrd r6, r7, [r1, #128] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR - and r8, r14, r12, lsr #4 - and r12, r12, r3 - orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4) - and r8, r3, r11, lsr #12 - and r11, r11, r14 - orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) - rev16 r10, r10 //HALF_ROR(r10, 8) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 3rd round - movw r5, 0x0002 - movt r5, 0x1150 //load rconst - ldrd r6, r7, [r1, #136] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE - eor r8, r10, r10, lsr #1 - and r8, r8, r14 - eor r10, r10, r8 - eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) - eor r8, r9, r9, lsr #1 - and r8, r8, r14, lsr #16 - eor r9, r9, r8 - eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1) - eor r8, r11, r11, lsr #1 - and r8, r8, r14, lsl #16 - eor r11, r11, r8 - eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) - eor r10, r10, r6 //add 1st keyword - eor r11, r7, r11, ror #16 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 4th round - movw r5, 0x0180 - movt r5, 0x0302 //load rconst - ldrd r6, r7, [r1, #144] //load rkey - and r8, r11, r9, ror #16 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r8, r9, ror #16 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR - and r8, r14, r10, lsr #4 - and r10, r10, r14 - orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) - orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR - mvn r8, r14 - and r8, r8, r11, lsl #6 - and r11, r14, r11, lsr #2 - orr r11, r11, r8 //BYTE_ROR(r11, 2) - mvn r8, r14, lsr #6 - and r8, r8, r12, lsr #6 - and r12, r14, r12 - orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 5th round - movw r5, 0x002b - movt r5, 0x8000 //load rconst - ldrd r6, r7, [r1, #152] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - eor r10, r6, r10, ror #16 //add 1st keyword - eor r11, r7, r11, ror #8 //add 2nd keyword - eor r12, r12, r5 //add rconst - - // ------------------ 5th QUINTUPLE ROUND ------------------ - // 1st round - movw r5, 0x0880 - movt r5, 0x1008 //load rconst - ldrd r6, r7, [r1, #160] //load rkey - and r8, r11, r9, ror #24 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r8, r9, ror #24 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - and r8, r4, r12, lsr #1 - and r12, r12, r2 - orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1) - and r8, r4, r11 - and r11, r2, r11, lsr #3 - orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) - orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR - and r8, r14, r10, lsr #2 - and r10, r10, r14 - orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 2nd round - movw r5, 0x4000 - movt r5, 0x6001 //load rconst - ldrd r6, r7, [r1, #168] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR - and r8, r14, r9, lsr #4 - and r9, r9, r3 - orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4) - and r8, r3, r11, lsr #12 - and r11, r11, r14 - orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) - rev16 r10, r10 //HALF_ROR(r10, 8) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 3rd round - movw r5, 0x0002 - movt r5, 0x0140 //load rconst - ldrd r6, r7, [r1, #176] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE - eor r8, r10, r10, lsr #1 - and r8, r8, r14 - eor r10, r10, r8 - eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) - eor r8, r12, r12, lsr #1 - and r8, r8, r14, lsr #16 - eor r12, r12, r8 - eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x00005555, 1) - eor r8, r11, r11, lsr #1 - and r8, r8, r14, lsl #16 - eor r11, r11, r8 - eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) - eor r10, r10, r6 //add 1st keyword - eor r11, r7, r11, ror #16 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 4th round - movw r5, 0x0080 - movt r5, 0x0202 //load rconst - ldrd r6, r7, [r1, #184] //load rkey - and r8, r11, r12, ror #16 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r8, r12, ror #16 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR - and r8, r14, r10, lsr #4 - and r10, r10, r14 - orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) - orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR - mvn r8, r14 - and r8, r8, r11, lsl #6 - and r11, r14, r11, lsr #2 - orr r11, r11, r8 //BYTE_ROR(r11, 2) - mvn r8, r14, lsr #6 - and r8, r8, r9, lsr #6 - and r9, r14, r9 - orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 5th round - movw r5, 0x0021 - movt r5, 0x8000 //load rconst - ldrd r6, r7, [r1, #192] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - eor r10, r6, r10, ror #16 //add 1st keyword - eor r11, r7, r11, ror #8 //add 2nd keyword - eor r9, r9, r5 //add rconst - - // ------------------ 6th QUINTUPLE ROUND ------------------ - // 1st round - movw r5, 0x0080 - movt r5, 0x1000 //load rconst - ldrd r6, r7, [r1, #200] //load rkey - and r8, r11, r12, ror #24 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r8, r12, ror #24 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - and r8, r4, r9, lsr #1 - and r9, r9, r2 - orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1) - and r8, r4, r11 - and r11, r2, r11, lsr #3 - orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) - orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR - and r8, r14, r10, lsr #2 - and r10, r10, r14 - orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 2nd round - movw r5, 0xc000 - movt r5, 0x0001 //load rconst - ldrd r6, r7, [r1, #208] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR - and r8, r14, r12, lsr #4 - and r12, r12, r3 - orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4) - and r8, r3, r11, lsr #12 - and r11, r11, r14 - orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) - rev16 r10, r10 //HALF_ROR(r10, 8) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 3rd round - movw r5, 0x0002 - movt r5, 0x5100 //load rconst - ldrd r6, r7, [r1, #216] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE - eor r8, r10, r10, lsr #1 - and r8, r8, r14 - eor r10, r10, r8 - eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) - eor r8, r9, r9, lsr #1 - and r8, r8, r14, lsr #16 - eor r9, r9, r8 - eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1) - eor r8, r11, r11, lsr #1 - and r8, r8, r14, lsl #16 - eor r11, r11, r8 - eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) - eor r10, r10, r6 //add 1st keyword - eor r11, r7, r11, ror #16 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 4th round - movw r5, 0x0180 - movt r5, 0x0301 //load rconst - ldrd r6, r7, [r1, #224] //load rkey - and r8, r11, r9, ror #16 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r8, r9, ror #16 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR - and r8, r14, r10, lsr #4 - and r10, r10, r14 - orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) - orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR - mvn r8, r14 - and r8, r8, r11, lsl #6 - and r11, r14, r11, lsr #2 - orr r11, r11, r8 //BYTE_ROR(r11, 2) - mvn r8, r14, lsr #6 - and r8, r8, r12, lsr #6 - and r12, r14, r12 - orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 5th round - movw r5, 0x002e - movt r5, 0x8000 //load rconst - ldrd r6, r7, [r1, #232] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - eor r10, r6, r10, ror #16 //add 1st keyword - eor r11, r7, r11, ror #8 //add 2nd keyword - eor r12, r12, r5 //add rconst - - - // ------------------ 7th QUINTUPLE ROUND ------------------ - // 1st round - movw r5, 0x8800 - movt r5, 0x1008 //load rconst - ldrd r6, r7, [r1, #240] //load rkey - and r8, r11, r9, ror #24 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r8, r9, ror #24 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - and r8, r4, r12, lsr #1 - and r12, r12, r2 - orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1) - and r8, r4, r11 - and r11, r2, r11, lsr #3 - orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) - orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR - and r8, r14, r10, lsr #2 - and r10, r10, r14 - orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 2nd round - movw r5, 0x2000 - movt r5, 0x6001 //load rconst - ldrd r6, r7, [r1, #248] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR - and r8, r14, r9, lsr #4 - and r9, r9, r3 - orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4) - and r8, r3, r11, lsr #12 - and r11, r11, r14 - orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) - rev16 r10, r10 //HALF_ROR(r10, 8) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 3rd round - movw r5, 0x0002 - movt r5, 0x4050 //load rconst - ldrd r6, r7, [r1, #256] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE - eor r8, r10, r10, lsr #1 - and r8, r8, r14 - eor r10, r10, r8 - eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) - eor r8, r12, r12, lsr #1 - and r8, r8, r14, lsr #16 - eor r12, r12, r8 - eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x00005555, 1) - eor r8, r11, r11, lsr #1 - and r8, r8, r14, lsl #16 - eor r11, r11, r8 - eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) - eor r10, r10, r6 //add 1st keyword - eor r11, r7, r11, ror #16 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 4th round - movw r5, 0x0080 - movt r5, 0x0103 //load rconst - ldrd r6, r7, [r1, #264] //load rkey - and r8, r11, r12, ror #16 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r8, r12, ror #16 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR - and r8, r14, r10, lsr #4 - and r10, r10, r14 - orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) - orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR - mvn r8, r14 - and r8, r8, r11, lsl #6 - and r11, r14, r11, lsr #2 - orr r11, r11, r8 //BYTE_ROR(r11, 2) - mvn r8, r14, lsr #6 - and r8, r8, r9, lsr #6 - and r9, r14, r9 - orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 5th round - movw r5, 0x0006 - movt r5, 0x8000 //load rconst - ldrd r6, r7, [r1, #272] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - eor r10, r6, r10, ror #16 //add 1st keyword - eor r11, r7, r11, ror #8 //add 2nd keyword - eor r9, r9, r5 //add rconst - - // ------------------ 8th QUINTUPLE ROUND ------------------ - // 1st round - movw r5, 0x8808 - movt r5, 0x1000 //load rconst - ldrd r6, r7, [r1, #280] //load rkey - and r8, r11, r12, ror #24 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r8, r12, ror #24 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - and r8, r4, r9, lsr #1 - and r9, r9, r2 - orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1) - and r8, r4, r11 - and r11, r2, r11, lsr #3 - orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) - orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR - and r8, r14, r10, lsr #2 - and r10, r10, r14 - orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 2nd round - movw r5, 0xa000 - movt r5, 0xc001 //load rconst - ldrd r6, r7, [r1, #288] //load rkey - and r8, r9, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r9, r8 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR - and r8, r14, r12, lsr #4 - and r12, r12, r3 - orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4) - and r8, r3, r11, lsr #12 - and r11, r11, r14 - orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) - rev16 r10, r10 //HALF_ROR(r10, 8) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 3rd round - movw r5, 0x0002 - movt r5, 0x1450 //load rconst - ldrd r6, r7, [r1, #296] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9 - orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE - eor r8, r10, r10, lsr #1 - and r8, r8, r14 - eor r10, r10, r8 - eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) - eor r8, r9, r9, lsr #1 - and r8, r8, r14, lsr #16 - eor r9, r9, r8 - eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1) - eor r8, r11, r11, lsr #1 - and r8, r8, r14, lsl #16 - eor r11, r11, r8 - eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) - eor r10, r10, r6 //add 1st keyword - eor r11, r7, r11, ror #16 //add 2nd keyword - eor r12, r12, r5 //add rconst - // 4th round - movw r5, 0x0181 - movt r5, 0x0102 //load rconst - ldrd r6, r7, [r1, #304] //load rkey - and r8, r11, r9, ror #16 //sbox layer - eor r10, r10, r8 - and r8, r10, r12 - eor r9, r8, r9, ror #16 - orr r8, r9, r10 - eor r11, r11, r8 - eor r12, r12, r11 - eor r10, r10, r12 - and r8, r9, r10 - eor r11, r11, r8 - mvn r12, r12 - eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR - and r8, r14, r10, lsr #4 - and r10, r10, r14 - orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) - orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR - mvn r8, r14 - and r8, r8, r11, lsl #6 - and r11, r14, r11, lsr #2 - orr r11, r11, r8 //BYTE_ROR(r11, 2) - mvn r8, r14, lsr #6 - and r8, r8, r12, lsr #6 - and r12, r14, r12 - orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6) - eor r10, r10, r6 //add 1st keyword - eor r11, r11, r7 //add 2nd keyword - eor r9, r9, r5 //add rconst - // 5th round - movw r5, 0x001a - movt r5, 0x8000 //load rconst - ldrd r6, r7, [r1, #312] //load rkey - and r8, r12, r11 //sbox layer - eor r10, r10, r8 - and r8, r10, r9 - eor r12, r12, r8 - orr r8, r12, r10 - eor r11, r11, r8 - eor r9, r9, r11 - eor r10, r10, r9 - and r8, r12, r10 - eor r11, r11, r8 - mvn r9, r9, ror #24 - eor r10, r6, r10, ror #16 //add 1st keyword - eor r11, r7, r11, ror #8 //add 2nd keyword - eor r12, r12, r5 //add rconst - - // endianness - rev r9, r9 - rev r10, r10 - rev r11, r11 - rev r12, r12 - - stm r0, {r9-r12} - pop {r2-r12,r14} - bx lr - \ No newline at end of file diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftcofb128v1.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftcofb128v1.h deleted file mode 100644 index 23c82db..0000000 --- a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm/giftcofb128v1.h +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef GIFT_COFB_H_ -#define GIFT_COFB_H_ - -#define TAG_SIZE 16 -#define COFB_ENCRYPT 1 -#define COFB_DECRYPT 0 - -typedef unsigned char u8; -typedef unsigned int u32; - -int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, - const unsigned char* m, unsigned long long mlen, - const unsigned char* ad, unsigned long long adlen, - const unsigned char* nsec, const unsigned char* npub, - const unsigned char* k); - -int crypto_aead_decrypt(unsigned char* m, unsigned long long *mlen, - unsigned char* nsec, const unsigned char* c, - unsigned long long clen, const unsigned char* ad, - unsigned long long adlen, const unsigned char* npub, - const unsigned char *k); - -#define DOUBLE_HALF_BLOCK(x) ({ \ - tmp0 = (x)[0]; \ - (x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); \ - (x)[0] |= ((x)[1] & 0x80808080) << 17; \ - (x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); \ - (x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; \ -}) - -#define TRIPLE_HALF_BLOCK(x) ({ \ - tmp0 = (x)[0]; \ - tmp1 = (x)[1]; \ - (x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); \ - (x)[0] |= ((x)[1] & 0x80808080) << 17; \ - (x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); \ - (x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; \ - (x)[0] ^= tmp0; \ - (x)[1] ^= tmp1; \ -}) - -#define G(x) ({ \ - tmp0 = (x)[0]; \ - tmp1 = (x)[1]; \ - (x)[0] = (x)[2]; \ - (x)[1] = (x)[3]; \ - (x)[2] = ((tmp0 & 0x7f7f7f7f) << 1) | ((tmp0 & 0x80808080) >> 15); \ - (x)[2] |= ((tmp1 & 0x80808080) << 17); \ - (x)[3] = ((tmp1 & 0x7f7f7f7f) << 1) | ((tmp1 & 0x80808080) >> 15); \ - (x)[3] |= ((tmp0 & 0x80808080) << 17); \ -}) - -#define XOR_BLOCK(x, y, z) ({ \ - (x)[0] = (y)[0] ^ (z)[0]; \ - (x)[1] = (y)[1] ^ (z)[1]; \ - (x)[2] = (y)[2] ^ (z)[2]; \ - (x)[3] = (y)[3] ^ (z)[3]; \ -}) - -#define XOR_TOP_BAR_BLOCK(x, y) ({ \ - (x)[0] ^= (y)[0]; \ - (x)[1] ^= (y)[1]; \ -}) - -#define RHO1(d, y, m, n) ({ \ - G(y); \ - padding(d,m,n); \ - XOR_BLOCK(d, d, y); \ -}) - -#define RHO(y, m, x, c, n) ({ \ - XOR_BLOCK(c, y, m); \ - RHO1(x, y, m, n); \ -}) - -#define RHO_PRIME(y, c, x, m, n) ({ \ - XOR_BLOCK(m, y, c); \ - RHO1(x, y, m, n); \ -}) - -#endif // GIFT_COFB_H_ diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/api.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/api.h new file mode 100644 index 0000000..fb1d58b --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/cofb.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/cofb.h new file mode 100644 index 0000000..143c7d3 --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/cofb.h @@ -0,0 +1,20 @@ +#ifndef GIFT_COFB_H_ +#define GIFT_COFB_H_ + +#define TAG_SIZE 16 +#define COFB_ENCRYPT 1 +#define COFB_DECRYPT 0 + +#define XOR_BLOCK(x, y, z) ({ \ + (x)[0] = (y)[0] ^ (z)[0]; \ + (x)[1] = (y)[1] ^ (z)[1]; \ + (x)[2] = (y)[2] ^ (z)[2]; \ + (x)[3] = (y)[3] ^ (z)[3]; \ +}) + +#define XOR_TOP_BAR_BLOCK(x, y) ({ \ + (x)[0] ^= (y)[0]; \ + (x)[1] ^= (y)[1]; \ +}) + +#endif // GIFT_COFB_H_ \ No newline at end of file diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/encrypt.c b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/encrypt.c new file mode 100644 index 0000000..3ac3cff --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/encrypt.c @@ -0,0 +1,191 @@ +#include +#include "api.h" +#include "cofb.h" +#include "giftb128.h" + +static inline void padding(u32* d, const u32* s, const u32 no_of_bytes){ + u32 i; + if (no_of_bytes == 0) { + d[0] = 0x00000080; // little-endian + d[1] = 0x00000000; + d[2] = 0x00000000; + d[3] = 0x00000000; + } + else if (no_of_bytes < GIFT128_BLOCK_SIZE) { + for (i = 0; i < no_of_bytes/4+1; i++) + d[i] = s[i]; + d[i-1] &= ~(0xffffffffL << (no_of_bytes % 4)*8); + d[i-1] |= 0x00000080L << (no_of_bytes % 4)*8; + for (; i < 4; i++) + d[i] = 0x00000000; + } + else { + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = s[3]; + } +} + +static inline void double_half_block(u32* x) { + u32 tmp0; + tmp0 = (x)[0]; + (x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); + (x)[0] |= ((x)[1] & 0x80808080) << 17; + (x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); + (x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; +} + +static inline void triple_half_block(u32* x) { + u32 tmp0, tmp1; + tmp0 = (x)[0]; + tmp1 = (x)[1]; + (x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); + (x)[0] |= ((x)[1] & 0x80808080) << 17; + (x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); + (x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; + (x)[0] ^= tmp0; + (x)[1] ^= tmp1; +} + +static inline void g(u32 *x) { + u32 tmp0, tmp1; + tmp0 = (x)[0]; + tmp1 = (x)[1]; + (x)[0] = (x)[2]; + (x)[1] = (x)[3]; + (x)[2] = ((tmp0 & 0x7f7f7f7f) << 1) | ((tmp0 & 0x80808080) >> 15); + (x)[2] |= ((tmp1 & 0x80808080) << 17); + (x)[3] = ((tmp1 & 0x7f7f7f7f) << 1) | ((tmp1 & 0x80808080) >> 15); + (x)[3] |= ((tmp0 & 0x80808080) << 17); +} + +static inline void rho1(u32* d, u32* y, u32* m, u32 n) { + g(y); + padding(d,m,n); + XOR_BLOCK(d, d, y); +} + +static inline void rho(u32* y, u32* m, u32* x, u32* c, u32 n) { + XOR_BLOCK(c, y, m); + rho1(x, y, m, n); +} + +static inline void rho_prime(u32* y, u32*c, u32* m, u32* x, u32 n) { + XOR_BLOCK(m, y, c); + rho1(x, y, m, n); +} + +/**************************************************************************** +* Constant-time implementation of the GIFT-COFB authenticated cipher based on +* fixsliced GIFTb-128. Encryption/decryption is handled by the same function, +* depending on the 'mode' parameter (1/0). +****************************************************************************/ +int giftcofb_crypt(u8* out, const u8* key, const u8* nonce, const u8* ad, + u32 ad_len, const u8* in, u32 in_len, const int encrypting) { + + u32 tmp0, tmp1, emptyA, emptyM, offset[2]; + u32 input[4], rkey[80]; + u8 Y[GIFT128_BLOCK_SIZE]; + + if (!encrypting) { + if (in_len < TAG_SIZE) + return -1; + in_len -= TAG_SIZE; + } + + if(ad_len == 0) + emptyA = 1; + else + emptyA = 0; + + if(in_len == 0) + emptyM =1; + else + emptyM = 0; + + gift128_keyschedule(key, rkey); + giftb128_encrypt_block(Y, rkey, nonce); + offset[0] = ((u32*)Y)[0]; + offset[1] = ((u32*)Y)[1]; + + while(ad_len > GIFT128_BLOCK_SIZE){ + rho1(input, (u32*)Y, (u32*)ad, GIFT128_BLOCK_SIZE); + double_half_block(offset); + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + ad += GIFT128_BLOCK_SIZE; + ad_len -= GIFT128_BLOCK_SIZE; + } + + triple_half_block(offset); + if((ad_len % GIFT128_BLOCK_SIZE != 0) || (emptyA)) + triple_half_block(offset); + if(emptyM) { + triple_half_block(offset); + triple_half_block(offset); + } + + rho1(input, (u32*)Y, (u32*)ad, ad_len); + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + + while (in_len > GIFT128_BLOCK_SIZE){ + double_half_block(offset); + if (encrypting) + rho((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE); + else + rho_prime((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE); + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + in += GIFT128_BLOCK_SIZE; + out += GIFT128_BLOCK_SIZE; + in_len -= GIFT128_BLOCK_SIZE; + } + + if(!emptyM){ + triple_half_block(offset); + if(in_len % GIFT128_BLOCK_SIZE != 0) + triple_half_block(offset); + if (encrypting) { + rho((u32*)Y, (u32*)in, input, (u32*)out, in_len); + out += in_len; + } + else { + rho_prime((u32*)Y, (u32*)in, input, (u32*)out, in_len); + in += in_len; + } + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + } + + if (encrypting) { // encryption mode + memcpy(out, Y, TAG_SIZE); + return 0; + } + // decrypting + tmp0 = 0; + for(tmp1 = 0; tmp1 < TAG_SIZE; tmp1++) + tmp0 |= in[tmp1] ^ Y[tmp1]; + return tmp0; +} + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + (void)nsec; + *clen = mlen + TAG_SIZE; + return giftcofb_crypt(c, k, npub, ad, adlen, m, mlen, COFB_ENCRYPT); +} + +int crypto_aead_decrypt(unsigned char* m, unsigned long long *mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char *k) { + (void)nsec; + *mlen = clen - TAG_SIZE; + return giftcofb_crypt(m, k, npub, ad, adlen, c, clen, COFB_DECRYPT); +} \ No newline at end of file diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/giftb128.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/giftb128.h new file mode 100644 index 0000000..bcb4f36 --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/giftb128.h @@ -0,0 +1,13 @@ +#ifndef GIFT128_H_ +#define GIFT128_H_ + +#define KEY_SIZE 16 +#define GIFT128_BLOCK_SIZE 16 + +typedef unsigned char u8; +typedef unsigned int u32; + +extern void gift128_keyschedule(const u8* key, u32* rkey); +extern void giftb128_encrypt_block(u8* out_block, const u32* rkey, const u8* in_block); + +#endif // GIFT128_H_ \ No newline at end of file diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/giftb128.s b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/giftb128.s new file mode 100644 index 0000000..5e2b48f --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_balanced/giftb128.s @@ -0,0 +1,768 @@ +/**************************************************************************** +* Balanced ARM assembly implementation of the GIFT-128 block cipher. This +* implementation provides efficiency with limited impact on the code size. +* See "Fixslicing: A New GIFT Representation" paper available at +* https:// for more details. +****************************************************************************/ + +.syntax unified +.thumb + +/***************************************************************************** +* Round constants look-up table according to the fixsliced representation. +*****************************************************************************/ +.align 2 +.type rconst,%object +rconst: +.word 0x10000008, 0x80018000, 0x54000002, 0x01010181 +.word 0x8000001f, 0x10888880, 0x6001e000, 0x51500002 +.word 0x03030180, 0x8000002f, 0x10088880, 0x60016000 +.word 0x41500002, 0x03030080, 0x80000027, 0x10008880 +.word 0x4001e000, 0x11500002, 0x03020180, 0x8000002b +.word 0x10080880, 0x60014000, 0x01400002, 0x02020080 +.word 0x80000021, 0x10000080, 0x0001c000, 0x51000002 +.word 0x03010180, 0x8000002e, 0x10088800, 0x60012000 +.word 0x40500002, 0x01030080, 0x80000006, 0x10008808 +.word 0xc001a000, 0x14500002, 0x01020181, 0x8000001a + +.align 2 +classical_key_update: + and r2, r10, r7, lsr #12 + and r3, r7, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r7, lsr #2 + orr r2, r2, r3 + and r7, r7, #0x00030000 + orr r7, r2, r7, lsl #14 + str.w r7, [r1, #4] //1st classical key update + str.w r5, [r1], #8 //1st classical key update + and r2, r10, r6, lsr #12 + and r3, r6, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r6, lsr #2 + orr r2, r2, r3 + and r6, r6, #0x00030000 + orr r6, r2, r6, lsl #14 + str.w r6, [r1, #4] //2nd classical key update + str.w r4, [r1], #8 //2nd classical key update + and r2, r10, r5, lsr #12 + and r3, r5, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r5, lsr #2 + orr r2, r2, r3 + and r5, r5, #0x00030000 + orr r5, r2, r5, lsl #14 + str.w r5, [r1, #4] //3rd classical key update + str.w r7, [r1], #8 //3rd classical key update + and r2, r10, r4, lsr #12 + and r3, r4, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r4, lsr #2 + orr r2, r2, r3 + and r4, r4, #0x00030000 + orr r4, r2, r4, lsl #14 + str.w r4, [r1, #4] //4th classical key update + str.w r6, [r1], #8 //4th classical key update + bx lr + +.align 2 +rearrange_rkey_0: + ldrd r6, r4, [r1] + eor r12, r6, r6, lsr #9 + and r12, r12, r3 + eor r6, r12 + eor r6, r6, r12, lsl #9 //SWAPMOVE(r6, r6, 0x00550055, 9); + eor r12, r4, r4, lsr #9 + and r12, r12, r3 + eor r4, r12 + eor r4, r4, r12, lsl #9 //SWAPMOVE(r4, r4, 0x00550055, 9); + eor r12, r6, r6, lsr #18 + and r12, r12, r10 + eor r6, r12 + eor r6, r6, r12, lsl #18 //SWAPMOVE(r6, r6, 0x3333, 18); + eor r12, r4, r4, lsr #18 + and r12, r12, r10 + eor r4, r12 + eor r4, r4, r12, lsl #18 //SWAPMOVE(r4, r4, 0x3333, 18); + eor r12, r6, r6, lsr #12 + and r12, r12, r11 + eor r6, r12 + eor r6, r6, r12, lsl #12 //SWAPMOVE(r6, r6, 0x000f000f, 12); + eor r12, r4, r4, lsr #12 + and r12, r12, r11 + eor r4, r12 + eor r4, r4, r12, lsl #12 //SWAPMOVE(r4, r4, 0x000f000f, 12); + eor r12, r6, r6, lsr #24 + and r12, r12, #0xff + eor r6, r12 + eor r6, r6, r12, lsl #24 //SWAPMOVE(r6, r6, 0x000000ff, 24); + eor r12, r4, r4, lsr #24 + and r12, r12, #0xff + eor r4, r12 + eor r4, r4, r12, lsl #24 //SWAPMOVE(r4, r4, 0x000000ff, 24); + str.w r6, [r1] + str.w r4, [r1, #4] + bx lr + +.align 2 +rearrange_rkey_1: + ldrd r5, r7, [r1] + eor r8, r7, r7, lsr #3 + and r8, r8, r3 + eor r7, r8 + eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x11111111, 3); + eor r8, r5, r5, lsr #3 + and r8, r8, r3 + eor r5, r8 + eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x11111111, 3); + eor r8, r7, r7, lsr #6 + and r8, r8, r10 + eor r7, r8 + eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x03030303, 6); + eor r8, r5, r5, lsr #6 + and r8, r8, r10 + eor r5, r8 + eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x03030303, 6); + eor r8, r7, r7, lsr #12 + and r8, r8, r11 + eor r7, r8 + eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r5, r5, lsr #12 + and r8, r8, r11 + eor r5, r8 + eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r7, r7, lsr #24 + and r8, r8, #0xff + eor r7, r8 + eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); + eor r8, r5, r5, lsr #24 + and r8, r8, #0xff + eor r5, r8 + eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); + str.w r5, [r1] + str.w r7, [r1, #4] + bx lr + +.align 2 +rearrange_rkey_2: + ldrd r5, r7, [r1] + eor r8, r7, r7, lsr #15 + and r8, r8, r3 + eor r7, r8 + eor r7, r7, r8, lsl #15 //SWAPMOVE(r7, r7, 0x0000aaaa, 15); + eor r8, r5, r5, lsr #15 + and r8, r8, r3 + eor r5, r8 + eor r5, r5, r8, lsl #15 //SWAPMOVE(r5, r5, 0x0000aaaa, 15); + eor r8, r7, r7, lsr #18 + and r8, r8, r10 + eor r7, r8 + eor r7, r7, r8, lsl #18 //SWAPMOVE(r7, r7, 0x00003333, 18); + eor r8, r5, r5, lsr #18 + and r8, r8, r10 + eor r5, r8 + eor r5, r5, r8, lsl #18 //SWAPMOVE(r5, r5, 0x00003333, 18); + eor r8, r7, r7, lsr #12 + and r8, r8, r11 + eor r7, r8 + eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r5, r5, lsr #12 + and r8, r8, r11 + eor r5, r8 + eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r7, r7, lsr #24 + and r8, r8, #0xff + eor r7, r8 + eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x00000ff, 24); + eor r8, r5, r5, lsr #24 + and r8, r8, #0xff + eor r5, r8 + eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); + str.w r5, [r1] + str.w r7, [r1, #4] + bx lr + +.align 2 +rearrange_rkey_3: + ldrd r5, r7, [r1] + eor r8, r7, r7, lsr #3 + and r8, r8, r3 + eor r7, r8 + eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x0a0a0a0a, 3); + eor r8, r5, r5, lsr #3 + and r8, r8, r3 + eor r5, r8 + eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x0a0a0a0a, 3); + eor r8, r7, r7, lsr #6 + and r8, r8, r10 + eor r7, r8 + eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x00cc00cc, 6); + eor r8, r5, r5, lsr #6 + and r8, r8, r10 + eor r5, r8 + eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x00cc00cc, 6); + eor r8, r7, r7, lsr #12 + and r8, r8, r11 + eor r7, r8 + eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r5, r5, lsr #12 + and r8, r8, r11 + eor r5, r8 + eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r7, r7, lsr #24 + and r8, r8, #0xff + eor r7, r8 + eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); + eor r8, r5, r5, lsr #24 + and r8, r8, #0xff + eor r5, r8 + eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); + str.w r5, [r1] + str.w r7, [r1, #4] + bx lr + +.align 2 +key_update_0: + ldrd r4, r5, [r1], #80 + and r2, r12, r4, ror #24 + and r4, r4, r11 + orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4) + eor r2, r4, r4, lsr #1 + and r2, r2, r8 + eor r4, r4, r2 + eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1) + eor r2, r5, r5, lsr #16 + and r2, r2, r10 + eor r5, r5, r2 + eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16) + eor r2, r5, r5, lsr #1 + and r2, r2, r9 + eor r5, r5, r2 + eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1) + str.w r4, [r1, #4] + str.w r5, [r1], #80 + and r2, r12, r5, ror #24 + and r5, r5, r11 + orr r5, r2, r5, ror #16 //KEY_TRIPLE_UPDATE_1(r5) + eor r2, r5, r5, lsr #1 + and r2, r2, r8 + eor r5, r5, r2 + eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x55551100, 1) + eor r2, r4, r4, lsr #16 + and r2, r2, r10 + eor r4, r4, r2 + eor r4, r4, r2, lsl #16 //SWAPMOVE(r4, r4, 0x00003333, 16) + eor r2, r4, r4, lsr #1 + and r2, r2, r9 + eor r4, r4, r2 + eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x555544444, 1) + str.w r5, [r1, #4] + str.w r4, [r1], #80 + and r2, r12, r4, ror #24 + and r4, r4, r11 + orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4) + eor r2, r4, r4, lsr #1 + and r2, r2, r8 + eor r4, r4, r2 + eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1) + eor r2, r5, r5, lsr #16 + and r2, r2, r10 + eor r5, r5, r2 + eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16) + eor r2, r5, r5, lsr #1 + and r2, r2, r9 + eor r5, r5, r2 + eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1) + str.w r4, [r1, #4] + str.w r5, [r1], #80 + bx lr + +.align 2 +key_update_1: + ldrd r4, r5, [r1], #80 + and r2, r9, r4, lsr #6 + and r3, r4, r10, lsl #8 + orr r2, r2, r3, lsl #2 + and r3, r8, r4, lsr #5 + orr r2, r2, r3 + and r4, r4, r7 + orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4) + and r2, r12, r5, lsr #4 + and r3, r5, r12 + orr r2, r2, r3, lsl #4 + and r3, r11, r5, lsr #6 + orr r2, r2, r3 + and r5, r5, r10 + orr r5, r2, r5, lsl #2 //KEY_DOUBLE_UPDATE_2(r5) + str.w r4, [r1, #4] + str.w r5, [r1], #80 + and r2, r9, r5, lsr #6 + and r3, r5, r10, lsl #8 + orr r2, r2, r3, lsl #2 + and r3, r8, r5, lsr #5 + orr r2, r2, r3 + and r5, r5, r7 + orr r5, r2, r5, lsl #3 //KEY_TRIPLE_UPDATE_2(r5) + and r2, r12, r4, lsr #4 + and r3, r4, r12 + orr r2, r2, r3, lsl #4 + and r3, r11, r4, lsr #6 + orr r2, r2, r3 + and r4, r4, r10 + orr r4, r2, r4, lsl #2 //KEY_DOUBLE_UPDATE_2(r4) + str.w r5, [r1, #4] + str.w r4, [r1], #80 + and r2, r9, r4, lsr #6 + and r3, r4, r10, lsl #8 + orr r2, r2, r3, lsl #2 + and r3, r8, r4, lsr #5 + orr r2, r2, r3 + and r4, r4, r7 + orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4) + and r2, r12, r5, lsr #4 + and r3, r5, r12 + orr r2, r2, r3, lsl #4 + and r3, r11, r5, lsr #6 + orr r2, r2, r3 + and r5, r5, r10 + orr r5, r2, r5, lsl#2 //KEY_DOUBLE_UPDATE_2(r5) + str.w r4, [r1, #4] + str.w r5, [r1], #80 + bx lr + +.align 2 +key_update_2: + ldrd r4, r5, [r1], #80 + and r2, r12, r4, ror #24 + and r4, r11, r4, ror #20 + orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r4) + and r2, r11, r5, ror #24 + and r5, r12, r5, ror #16 + orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r5) + str.w r4, [r1, #4] + str.w r5, [r1], #80 + and r2, r12, r5, ror #24 + and r5, r11, r5, ror #20 + orr r5, r5, r2 //KEY_TRIPLE_UPDATE_2(r5) + and r2, r11, r4, ror #24 + and r4, r12, r4, ror #16 + orr r4, r4, r2 //KEY_DOUBLE_UPDATE_2(r4) + str.w r5, [r1, #4] + str.w r4, [r1], #80 + and r2, r12, r4, ror #24 + and r4, r11, r4, ror #20 + orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r4) + and r2, r11, r5, ror #24 + and r5, r12, r5, ror #16 + orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r5) + str.w r4, [r1, #4] + str.w r5, [r1], #80 + bx lr + +.align 2 +key_update_3: + ldrd r4, r5, [r1], #80 + and r2, r10, r4, lsr #18 + and r3, r4, r7, lsr #4 + orr r2, r2, r3, lsl #3 + and r3, r11, r4, lsr #14 + orr r2, r2, r3 + and r3, r4, r12, lsr #11 + orr r2, r2, r3, lsl #15 + and r3, r12, r4, lsr #1 + orr r2, r2, r3 + and r4, r4, r7, lsr #16 + orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r9, r5, lsr #2 + and r3, r9, r5 + orr r2, r2, r3, lsl #2 + and r3, r8, r5, lsr #1 + orr r2, r2, r3 + and r5, r5, r7 + orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5) + str.w r4, [r1, #4] + str.w r5, [r1], #80 + and r2, r10, r5, lsr #18 + and r3, r5, r7, lsr #4 + orr r2, r2, r3, lsl #3 + and r3, r11, r5, lsr #14 + orr r2, r2, r3 + and r3, r5, r12, lsr #11 + orr r2, r2, r3, lsl #15 + and r3, r12, r5, lsr #1 + orr r2, r2, r3 + and r5, r5, r7, lsr #16 + orr r5, r2, r5, lsl #19 //KEY_TRIPLE_UPDATE_4(r5) + and r2, r9, r4, lsr #2 + and r3, r9, r4 + orr r2, r2, r3, lsl #2 + and r3, r8, r4, lsr #1 + orr r2, r2, r3 + and r4, r4, r7 + orr r4, r2, r4, lsl #3 //KEY_DOUBLE_UPDATE_4(r4) + str.w r5, [r1, #4] + str.w r4, [r1], #80 + and r2, r10, r4, lsr #18 + and r3, r4, r7, lsr #4 + orr r2, r2, r3, lsl #3 + and r3, r11, r4, lsr #14 + orr r2, r2, r3 + and r3, r4, r12, lsr #11 + orr r2, r2, r3, lsl #15 + and r3, r12, r4, lsr #1 + orr r2, r2, r3 + and r4, r4, r7, lsr #16 + orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r9, r5, lsr #2 + and r3, r9, r5 + orr r2, r2, r3, lsl #2 + and r3, r8, r5, lsr #1 + orr r2, r2, r3 + and r5, r5, r7 + orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5) + str.w r4, [r1, #4] + str.w r5, [r1], #80 + bx lr + +.align 2 +key_update_4: + ldrd r4, r5, [r1], #80 + and r2, r7, r4, lsr #6 + and r3, r4, #0x003f0000 + orr r2, r2, r3, lsl #10 + and r3, r12, r4, lsr #4 + orr r2, r2, r3 + and r4, r4, #0x000f + orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r10, r5, lsr #4 + and r3, r5, #0x000f0000 + orr r2, r2, r3, lsl #12 + and r3, r8, r5, lsr #8 + orr r2, r2, r3 + and r5, r5, r8 + orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5) + str.w r4, [r1, #4] + str.w r5, [r1], #80 + and r2, r7, r5, lsr #6 + and r3, r5, #0x003f0000 + orr r2, r2, r3, lsl #10 + and r3, r12, r5, lsr #4 + orr r2, r2, r3 + and r5, r5, #0x000f + orr r5, r2, r5, lsl #12 //KEY_TRIPLE_UPDATE_4(r5) + and r2, r10, r4, lsr #4 + and r3, r4, #0x000f0000 + orr r2, r2, r3, lsl #12 + and r3, r8, r4, lsr #8 + orr r2, r2, r3 + and r4, r4, r8 + orr r4, r2, r4, lsl #8 //KEY_DOUBLE_UPDATE_4(r4) + str.w r5, [r1, #4] + str.w r4, [r1], #80 + and r2, r7, r4, lsr #6 + and r3, r4, #0x003f0000 + orr r2, r2, r3, lsl #10 + and r3, r12, r4, lsr #4 + orr r2, r2, r3 + and r4, r4, #0x000f + orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r10, r5, lsr #4 + and r3, r5, #0x000f0000 + orr r2, r2, r3, lsl #12 + and r3, r8, r5, lsr #8 + orr r2, r2, r3 + and r5, r5, r8 + orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5) + str.w r4, [r1, #4] + str.w r5, [r1], #80 + bx lr + +/***************************************************************************** +* Balanced implementation of the GIFT-128 key schedule according to the +* fixsliced representation. +*****************************************************************************/ +.align 2 +@ void gift128_keyschedule(const u8* key, u32* rkey) { +.global gift128_keyschedule +.type gift128_keyschedule,%function +gift128_keyschedule: + push {r1-r12, r14} + ldm r0, {r4-r7} //load key words + rev r4, r4 //endianness (could be skipped with another representation) + rev r5, r5 //endianness (could be skipped with another representation) + rev r6, r6 //endianness (could be skipped with another representation) + rev r7, r7 //endianness (could be skipped with another representation) + str.w r5, [r1, #4] + str.w r7, [r1], #8 //the first rkeys are not updated + str.w r4, [r1, #4] + str.w r6, [r1], #8 //the first rkeys are not updated + movw r12, #0x3fff + lsl r12, r12, #16 //r12<- 0x3fff0000 + movw r10, #0x000f //r10<- 0x0000000f + movw r9, #0x0fff //r9 <- 0x00000fff + bl classical_key_update //keyschedule using classical representation (10 rounds) + bl classical_key_update //keyschedule using classical representation (20 rounds) + sub.w r1, r1, #80 + movw r3, #0x0055 + movt r3, #0x0055 //r3 <- 0x00550055 + movw r10, #0x3333 //r10<- 0x00003333 + movw r11, #0x000f + movt r11, #0x000f //r11<- 0x000f000f + bl rearrange_rkey_0 //fixslice the rkeys + add.w r1, r1, #40 + bl rearrange_rkey_0 //fixslice the rkeys + sub.w r1, r1, #32 + movw r3, #0x1111 + movt r3, #0x1111 //r3 <- 0x11111111 + movw r10, #0x0303 + movt r10, #0x0303 //r10<- 0x03030303 + bl rearrange_rkey_1 //fixslice the rkeys + add.w r1, r1, #40 + bl rearrange_rkey_1 //fixslice the rkeys + sub.w r1, r1, #32 + movw r3, #0xaaaa //r3 <- 0x0000aaaa + movw r10, #0x3333 //r10<- 0x00003333 + movw r11, #0xf0f0 //r11<- 0x0000f0f0 + bl rearrange_rkey_2 //fixslice the rkeys + add.w r1, r1, #40 + bl rearrange_rkey_2 //fixslice the rkeys + sub.w r1, r1, #32 + movw r3, #0x0a0a + movt r3, #0x0a0a //r3 <- 0x0a0a0a0a + movw r10, #0x00cc + movt r10, #0x00cc //r10<- 0x00cc00cc + bl rearrange_rkey_3 //fixslice the rkeys + add.w r1, r1, #40 + bl rearrange_rkey_3 //fixslice the rkeys + sub.w r1, r1, #64 + movw r10, #0x3333 //r10<- 0x00003333 + eor r12, r10, r10, lsl #16 //r12<- 0w33333333 + mvn r11, r12 //r11<- 0xcccccccc + movw r9, #0x4444 + movt r9, #0x5555 //r9 <- 0x55554444 + movw r8, #0x1100 + movt r8, #0x5555 //r8 <- 0x55551100 + bl key_update_0 //keyschedule according to fixslicing + sub.w r1, r1, #280 + bl key_update_0 //keyschedule according to fixslicing + sub.w r1, r1, #352 + movw r12, #0x0f00 + movt r12, #0x0f00 //r12<- 0x0f000f00 + movw r11, #0x0003 + movt r11, #0x0003 //r11<- 0x00030003 + movw r10, #0x003f + movt r10, #0x003f //r10<- 0x003f003f + lsl r9, r11, #8 //r9 <- 0x03000300 + and r8, r10, r10, lsr #3 //r8 <- 0x00070007 + orr r7, r8, r8, lsl #2 //r7 <- 0x001f001f + bl key_update_1 //keyschedule according to fixslicing + sub.w r1, r1, #280 + bl key_update_1 //keyschedule according to fixslicing + sub.w r1, r1, #352 + movw r12, #0x5555 + movt r12, #0x5555 //r12<- 0x55555555 + mvn r11, r12 //r11<- 0xaaaaaaaa + bl key_update_2 //keyschedule according to fixslicing + sub.w r1, r1, #280 + bl key_update_2 //keyschedule according to fixslicing + sub.w r1, r1, #352 + orr r12, r8, r8, lsl #8 //r12<- 0x07070707 + movw r11, #0xc0c0 //r11<- 0x0000c0c0 + movw r10, #0x3030 //r10<- 0x00003030 + and r9, r12, r12, lsr #1 //r9 <- 0x03030303 + lsl r8, r12, #4 //r8 <- 0x70707070 + eor r7, r8, r9, lsl #5 //r7 <- 0x10101010 + movw r6, #0xf0f0 //r6 <- 0x0000f0f0 + bl key_update_3 //keyschedule according to fixslicing + sub.w r1, r1, #280 + bl key_update_3 //keyschedule according to fixslicing + sub.w r1, r1, #352 + movw r12, #0x0fff + lsl r10, r12, #16 + movw r8, #0x00ff //r8 <- 0x000000ff + movw r7, #0x03ff //r7 <- 0x000003ff + lsl r7, r7, #16 + bl key_update_4 //keyschedule according to fixslicing + sub.w r1, r1, #280 + bl key_update_4 //keyschedule according to fixslicing + pop {r1-r12,r14} + bx lr + +.align 2 +quintuple_round: + str.w r14, [sp] + ldr.w r5, [r0], #4 + ldr.w r6, [r1], #4 //load rkey + ldr.w r7, [r1], #4 //load rkey + and r8, r11, r9 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + and r8, r4, r12, lsr #1 //permutation layer + and r12, r12, r2 + orr r12, r8, r12, lsl #3 //r12<- NIBBLE_ROR(r12, 1) + and r8, r4, r11 + and r11, r2, r11, lsr #3 + orr r11, r11, r8, lsl #1 //r11<- NIBBLE_ROR(r11, 3) + orr r14, r2, r2, lsl #1 //r14 <- 0x33333333 + and r8, r14, r10, lsr #2 + and r10, r10, r14 + orr r10, r8, r10, lsl #2 //r10<- NIBBLE_ROR(r10, 2) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + ldr.w r5, [r0], #4 + ldr.w r6, [r1], #4 //load rkey + ldr.w r7, [r1], #4 //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + mvn r14, r3, lsl #12 //r0 <- 0x0fff0fff + and r8, r14, r9, lsr #4 + and r9, r9, r3 + orr r9, r8, r9, lsl #12 //r9 <- HALF_ROR(r9, 4) + and r8, r3, r11, lsr #12 + and r11, r11, r14 + orr r11, r8, r11, lsl #4 //r11<- HALF_ROR(r11, 12) + rev16 r10, r10 //r10<- HALF_ROR(r10, 8) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + ldr.w r5, [r0], #4 + ldr.w r6, [r1], #4 //load rkey + ldr.w r7, [r1], #4 //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + orr r14, r2, r2, lsl #2 //r14 <- 0x55555555 for SWAPMOVE + eor r8, r10, r10, lsr #1 + and r8, r8, r14 + eor r10, r10, r8 + eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) + eor r8, r12, r12, lsr #1 + and r8, r8, r14, lsr #16 + eor r12, r12, r8 + eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x55550000, 1) + eor r8, r11, r11, lsr #1 + and r8, r8, r14, lsl #16 + eor r11, r11, r8 + eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x00005555, 1) + eor r10, r10, r6 //add 1st keyword + eor r11, r7, r11, ror #16 //add 2nd keyword + eor r9, r9, r5 //add rconst + ldr.w r5, [r0], #4 + ldr.w r6, [r1], #4 //load rkey + ldr.w r7, [r1], #4 //load rkey + and r8, r11, r12, ror #16 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r8, r12, ror #16 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + eor r14, r3, r3, lsl #8 //r14 <- 0x0f0f0f0f for BYTE_ROR + and r8, r14, r10, lsr #4 + and r10, r10, r14 + orr r10, r8, r10, lsl #4 //r10<- BYTE_ROR(r10, 4) + orr r14, r14, r14, lsl #2 //r14 <- 0x3f3f3f3f for BYTE_ROR + mvn r8, r14 //r8 <- 0xc0c0c0c0 for BYTE_ROR + and r8, r8, r11, lsl #6 + and r11, r14, r11, lsr #2 + orr r11, r11, r8 //r11<- BYTE_ROR(r11, 2) + mvn r8, r14, lsr #6 + and r8, r8, r9, lsr #6 + and r9, r14, r9 + orr r9, r8, r9, lsl #2 //r9 <- BYTE_ROR(r9, 6) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + ldr.w r5, [r0], #4 + ldr.w r6, [r1], #4 //load rkey + ldr.w r7, [r1], #4 //load rkey + ldr.w lr, [sp] //restore link register + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12, ror #24 + eor r10, r6, r10, ror #16 //add 1st keyword + eor r11, r7, r11, ror #8 //add 2nd keyword + eor r9, r9, r5 //add rconst + eor r9, r9, r12 //swap r9 with r12 + eor r12, r12, r9 //swap r9 with r12 + eor r9, r9, r12 //swap r9 with r12 + bx lr + +/***************************************************************************** +* Balanced ARM assembly implementation of the GIFTb-128 block cipher. +* This function simply encrypts a 128-bit block, without any operation mode. +*****************************************************************************/ +.align 2 +@ void giftb128_encrypt_block(u8 *out, const u32* rkey, const u8 *block) +.global giftb128_encrypt_block +.type giftb128_encrypt_block,%function +giftb128_encrypt_block: + push {r0,r2-r12,r14} + sub.w sp, #4 //to store 'lr' when calling 'quintuple_round' + ldm r2, {r9-r12} // load plaintext words + rev r9, r9 + rev r10, r10 + rev r11, r11 + rev r12, r12 + movw r2, #0x1111 + movt r2, #0x1111 //r2 <- 0x11111111 (for NIBBLE_ROR) + movw r3, #0x000f + movt r3, #0x000f //r3 <- 0x000f000f (for HALF_ROR) + mvn r4, r2, lsl #3 //r4 <- 0x7777777 (for NIBBLE_ROR) + adr r0, rconst //r0 <- 'rconst' address + bl quintuple_round + bl quintuple_round + bl quintuple_round + bl quintuple_round + bl quintuple_round + bl quintuple_round + bl quintuple_round + bl quintuple_round + ldr.w r0, [sp ,#4] //restore 'ctext' address + rev r9, r9 + rev r10, r10 + rev r11, r11 + rev r12, r12 + stm r0, {r9-r12} + add.w sp, #4 + pop {r0,r2-r12,r14} + bx lr + \ No newline at end of file diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/api.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/api.h new file mode 100644 index 0000000..fb1d58b --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/cofb.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/cofb.h new file mode 100644 index 0000000..143c7d3 --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/cofb.h @@ -0,0 +1,20 @@ +#ifndef GIFT_COFB_H_ +#define GIFT_COFB_H_ + +#define TAG_SIZE 16 +#define COFB_ENCRYPT 1 +#define COFB_DECRYPT 0 + +#define XOR_BLOCK(x, y, z) ({ \ + (x)[0] = (y)[0] ^ (z)[0]; \ + (x)[1] = (y)[1] ^ (z)[1]; \ + (x)[2] = (y)[2] ^ (z)[2]; \ + (x)[3] = (y)[3] ^ (z)[3]; \ +}) + +#define XOR_TOP_BAR_BLOCK(x, y) ({ \ + (x)[0] ^= (y)[0]; \ + (x)[1] ^= (y)[1]; \ +}) + +#endif // GIFT_COFB_H_ \ No newline at end of file diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/encrypt.c b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/encrypt.c new file mode 100644 index 0000000..7fbe2db --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/encrypt.c @@ -0,0 +1,191 @@ +#include +#include "api.h" +#include "cofb.h" +#include "giftb128.h" + +static inline void padding(u32* d, const u32* s, const u32 no_of_bytes){ + u32 i; + if (no_of_bytes == 0) { + d[0] = 0x00000080; // little-endian + d[1] = 0x00000000; + d[2] = 0x00000000; + d[3] = 0x00000000; + } + else if (no_of_bytes < GIFT128_BLOCK_SIZE) { + for (i = 0; i < no_of_bytes/4+1; i++) + d[i] = s[i]; + d[i-1] &= ~(0xffffffffL << (no_of_bytes % 4)*8); + d[i-1] |= 0x00000080L << (no_of_bytes % 4)*8; + for (; i < 4; i++) + d[i] = 0x00000000; + } + else { + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = s[3]; + } +} + +static inline void double_half_block(u32* x) { + u32 tmp0; + tmp0 = (x)[0]; + (x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); + (x)[0] |= ((x)[1] & 0x80808080) << 17; + (x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); + (x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; +} + +static inline void triple_half_block(u32* x) { + u32 tmp0, tmp1; + tmp0 = (x)[0]; + tmp1 = (x)[1]; + (x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); + (x)[0] |= ((x)[1] & 0x80808080) << 17; + (x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); + (x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; + (x)[0] ^= tmp0; + (x)[1] ^= tmp1; +} + +static inline void g(u32 *x) { + u32 tmp0, tmp1; + tmp0 = (x)[0]; + tmp1 = (x)[1]; + (x)[0] = (x)[2]; + (x)[1] = (x)[3]; + (x)[2] = ((tmp0 & 0x7f7f7f7f) << 1) | ((tmp0 & 0x80808080) >> 15); + (x)[2] |= ((tmp1 & 0x80808080) << 17); + (x)[3] = ((tmp1 & 0x7f7f7f7f) << 1) | ((tmp1 & 0x80808080) >> 15); + (x)[3] |= ((tmp0 & 0x80808080) << 17); +} + +static inline void rho1(u32* d, u32* y, u32* m, u32 n) { + g(y); + padding(d,m,n); + XOR_BLOCK(d, d, y); +} + +static inline void rho(u32* y, u32* m, u32* x, u32* c, u32 n) { + XOR_BLOCK(c, y, m); + rho1(x, y, m, n); +} + +static inline void rho_prime(u32* y, u32*c, u32* m, u32* x, u32 n) { + XOR_BLOCK(m, y, c); + rho1(x, y, m, n); +} + +/**************************************************************************** +* Constant-time implementation of the GIFT-COFB authenticated cipher based on +* fixsliced GIFTb-128. Encryption/decryption is handled by the same function, +* depending on the 'mode' parameter (1/0). +****************************************************************************/ +int giftcofb_crypt(u8* out, const u8* key, const u8* nonce, const u8* ad, + u32 ad_len, const u8* in, u32 in_len, const int encrypting) { + + u32 tmp0, tmp1, emptyA, emptyM, offset[2]; + u32 input[4], rkey[80]; + u8 Y[GIFT128_BLOCK_SIZE]; + + if (!encrypting) { + if (in_len < TAG_SIZE) + return -1; + in_len -= TAG_SIZE; + } + + if(ad_len == 0) + emptyA = 1; + else + emptyA = 0; + + if(in_len == 0) + emptyM =1; + else + emptyM = 0; + + gift128_keyschedule(key, rkey); + giftb128_encrypt_block(Y, rkey, nonce); + offset[0] = ((u32*)Y)[0]; + offset[1] = ((u32*)Y)[1]; + + while(ad_len > GIFT128_BLOCK_SIZE){ + rho1(input, (u32*)Y, (u32*)ad, GIFT128_BLOCK_SIZE); + double_half_block(offset); + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + ad += GIFT128_BLOCK_SIZE; + ad_len -= GIFT128_BLOCK_SIZE; + } + + triple_half_block(offset); + if((ad_len % GIFT128_BLOCK_SIZE != 0) || (emptyA)) + triple_half_block(offset); + if(emptyM) { + triple_half_block(offset); + triple_half_block(offset); + } + + rho1(input, (u32*)Y, (u32*)ad, ad_len); + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + + while (in_len > GIFT128_BLOCK_SIZE){ + double_half_block(offset); + if (encrypting) + rho((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE); + else + rho_prime((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE); + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + in += GIFT128_BLOCK_SIZE; + out += GIFT128_BLOCK_SIZE; + in_len -= GIFT128_BLOCK_SIZE; + } + + if(!emptyM){ + triple_half_block(offset); + if(in_len % GIFT128_BLOCK_SIZE != 0) + triple_half_block(offset); + if (encrypting) { + rho((u32*)Y, (u32*)in, input, (u32*)out, in_len); + out += in_len; + } + else { + rho_prime((u32*)Y, (u32*)in, input, (u32*)out, in_len); + in += in_len; + } + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + } + + if (encrypting) { // encryption mode + memcpy(out, Y, TAG_SIZE); + return 0; + } + // decrypting + tmp0 = 0; + for(tmp1 = 0; tmp1 < TAG_SIZE; tmp1++) + tmp0 |= in[tmp1] ^ Y[tmp1]; + return tmp0; +} + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + (void)nsec; + *clen = mlen + TAG_SIZE; + return giftcofb_crypt(c, k, npub, ad, adlen, m, mlen, COFB_ENCRYPT); +} + +int crypto_aead_decrypt(unsigned char* m, unsigned long long *mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char *k) { + (void)nsec; + *mlen = clen - TAG_SIZE; + return giftcofb_crypt(m, k, npub, ad, adlen, c, clen, COFB_DECRYPT); +} diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/giftb128.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/giftb128.h new file mode 100644 index 0000000..bcb4f36 --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/giftb128.h @@ -0,0 +1,13 @@ +#ifndef GIFT128_H_ +#define GIFT128_H_ + +#define KEY_SIZE 16 +#define GIFT128_BLOCK_SIZE 16 + +typedef unsigned char u8; +typedef unsigned int u32; + +extern void gift128_keyschedule(const u8* key, u32* rkey); +extern void giftb128_encrypt_block(u8* out_block, const u32* rkey, const u8* in_block); + +#endif // GIFT128_H_ \ No newline at end of file diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/giftb128.s b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/giftb128.s new file mode 100644 index 0000000..1cd3901 --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_compact/giftb128.s @@ -0,0 +1,512 @@ +/**************************************************************************** +* Compact ARM assembly implementation of the GIFT-128 block cipher. This +* implementation focuses on code size rather than speed. +* See "Fixslicing: A New GIFT Representation" paper available at +* https:// for more details. +****************************************************************************/ + +.syntax unified +.thumb + +/***************************************************************************** +* Round constants look-up table according to the fixsliced representation. +*****************************************************************************/ +.align 2 +.type rconst,%object +rconst: +.word 0x10000008, 0x80018000, 0x54000002, 0x01010181 +.word 0x8000001f, 0x10888880, 0x6001e000, 0x51500002 +.word 0x03030180, 0x8000002f, 0x10088880, 0x60016000 +.word 0x41500002, 0x03030080, 0x80000027, 0x10008880 +.word 0x4001e000, 0x11500002, 0x03020180, 0x8000002b +.word 0x10080880, 0x60014000, 0x01400002, 0x02020080 +.word 0x80000021, 0x10000080, 0x0001c000, 0x51000002 +.word 0x03010180, 0x8000002e, 0x10088800, 0x60012000 +.word 0x40500002, 0x01030080, 0x80000006, 0x10008808 +.word 0xc001a000, 0x14500002, 0x01020181, 0x8000001a + +.align 2 +key_update: + and r2, r10, r7, lsr #12 + and r3, r7, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r7, lsr #2 + orr r2, r2, r3 + and r7, r7, #0x00030000 + orr r7, r2, r7, lsl #14 + strd r5, r7, [r1], #8 //store rkeys after 1st key update + and r2, r10, r6, lsr #12 + and r3, r6, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r6, lsr #2 + orr r2, r2, r3 + and r6, r6, #0x00030000 + orr r6, r2, r6, lsl #14 + strd r4, r6, [r1], #8 //store rkeys after 2nd key update + and r2, r10, r5, lsr #12 + and r3, r5, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r5, lsr #2 + orr r2, r2, r3 + and r5, r5, #0x00030000 + orr r5, r2, r5, lsl #14 + strd r7, r5, [r1], #8 //store rkeys after 3rd key update + and r2, r10, r4, lsr #12 + and r3, r4, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r4, lsr #2 + orr r2, r2, r3 + and r4, r4, #0x00030000 + orr r4, r2, r4, lsl #14 + strd r6, r4, [r1], #8 //store rkeys after 4th key update + bx lr + + +.align 2 +rearrange_rkey_0: + ldrd r6, r4, [r1] + eor r12, r6, r6, lsr #9 + and r12, r12, r3 + eor r6, r12 + eor r6, r6, r12, lsl #9 //SWAPMOVE(r6, r6, 0x00550055, 9); + eor r12, r4, r4, lsr #9 + and r12, r12, r3 + eor r4, r12 + eor r4, r4, r12, lsl #9 //SWAPMOVE(r4, r4, 0x00550055, 9); + eor r12, r6, r6, lsr #18 + and r12, r12, r10 + eor r6, r12 + eor r6, r6, r12, lsl #18 //SWAPMOVE(r6, r6, 0x3333, 18); + eor r12, r4, r4, lsr #18 + and r12, r12, r10 + eor r4, r12 + eor r4, r4, r12, lsl #18 //SWAPMOVE(r4, r4, 0x3333, 18); + eor r12, r6, r6, lsr #12 + and r12, r12, r11 + eor r6, r12 + eor r6, r6, r12, lsl #12 //SWAPMOVE(r6, r6, 0x000f000f, 12); + eor r12, r4, r4, lsr #12 + and r12, r12, r11 + eor r4, r12 + eor r4, r4, r12, lsl #12 //SWAPMOVE(r4, r4, 0x000f000f, 12); + eor r12, r6, r6, lsr #24 + and r12, r12, #0xff + eor r6, r12 + eor r6, r6, r12, lsl #24 //SWAPMOVE(r6, r6, 0x000000ff, 24); + eor r12, r4, r4, lsr #24 + and r12, r12, #0xff + eor r4, r12 + eor r4, r4, r12, lsl #24 //SWAPMOVE(r4, r4, 0x000000ff, 24); + strd r6, r4, [r1] + bx lr + +.align 2 +rearrange_rkey_1: + ldrd r5, r7, [r1] + eor r8, r7, r7, lsr #3 + and r8, r8, r3 + eor r7, r8 + eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x11111111, 3); + eor r8, r5, r5, lsr #3 + and r8, r8, r3 + eor r5, r8 + eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x11111111, 3); + eor r8, r7, r7, lsr #6 + and r8, r8, r10 + eor r7, r8 + eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x03030303, 6); + eor r8, r5, r5, lsr #6 + and r8, r8, r10 + eor r5, r8 + eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x03030303, 6); + eor r8, r7, r7, lsr #12 + and r8, r8, r11 + eor r7, r8 + eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r5, r5, lsr #12 + and r8, r8, r11 + eor r5, r8 + eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r7, r7, lsr #24 + and r8, r8, #0xff + eor r7, r8 + eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); + eor r8, r5, r5, lsr #24 + and r8, r8, #0xff + eor r5, r8 + eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); + strd r5, r7, [r1] + bx lr + +.align 2 +rearrange_rkey_2: + ldrd r5, r7, [r1] + eor r8, r7, r7, lsr #15 + and r8, r8, r3 + eor r7, r8 + eor r7, r7, r8, lsl #15 //SWAPMOVE(r7, r7, 0x0000aaaa, 15); + eor r8, r5, r5, lsr #15 + and r8, r8, r3 + eor r5, r8 + eor r5, r5, r8, lsl #15 //SWAPMOVE(r5, r5, 0x0000aaaa, 15); + eor r8, r7, r7, lsr #18 + and r8, r8, r10 + eor r7, r8 + eor r7, r7, r8, lsl #18 //SWAPMOVE(r7, r7, 0x00003333, 18); + eor r8, r5, r5, lsr #18 + and r8, r8, r10 + eor r5, r8 + eor r5, r5, r8, lsl #18 //SWAPMOVE(r5, r5, 0x00003333, 18); + eor r8, r7, r7, lsr #12 + and r8, r8, r11 + eor r7, r8 + eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r5, r5, lsr #12 + and r8, r8, r11 + eor r5, r8 + eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r7, r7, lsr #24 + and r8, r8, #0xff + eor r7, r8 + eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x00000ff, 24); + eor r8, r5, r5, lsr #24 + and r8, r8, #0xff + eor r5, r8 + eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); + strd r5, r7, [r1] + bx lr + +.align 2 +rearrange_rkey_3: + ldrd r5, r7, [r1] + eor r8, r7, r7, lsr #3 + and r8, r8, r3 + eor r7, r8 + eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x0a0a0a0a, 3); + eor r8, r5, r5, lsr #3 + and r8, r8, r3 + eor r5, r8 + eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x0a0a0a0a, 3); + eor r8, r7, r7, lsr #6 + and r8, r8, r10 + eor r7, r8 + eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x00cc00cc, 6); + eor r8, r5, r5, lsr #6 + and r8, r8, r10 + eor r5, r8 + eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x00cc00cc, 6); + eor r8, r7, r7, lsr #12 + and r8, r8, r11 + eor r7, r8 + eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r5, r5, lsr #12 + and r8, r8, r11 + eor r5, r8 + eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r7, r7, lsr #24 + and r8, r8, #0xff + eor r7, r8 + eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); + eor r8, r5, r5, lsr #24 + and r8, r8, #0xff + eor r5, r8 + eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); + strd r5, r7, [r1] + bx lr + +/***************************************************************************** +* Code size optimized implementation of the GIFTb-128 key schedule. +* Compute the key schedule in the normal representation and then rearrange all +* the round keys in their respective fixsliced representations. +*****************************************************************************/ +.align 2 +@ void gift128_keyschedule(const u8* key, u32* rkey) +.global gift128_keyschedule +.type gift128_keyschedule,%function +gift128_keyschedule: + push {r2-r12, r14} + ldm r0, {r4-r7} //load key words + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 + strd r7, r5, [r1], #8 //the first rkeys are not updated + strd r6, r4, [r1], #8 //the first rkeys are not updated + // keyschedule using classical representation for the first 20 rounds + movw r12, #0x3fff + lsl r12, r12, #16 //r12<- 0x3fff0000 + movw r10, #0x000f //r10<- 0x0000000f + movw r9, #0x0fff //r9 <- 0x00000fff + bl key_update + bl key_update + bl key_update + bl key_update + bl key_update + bl key_update + bl key_update + bl key_update + bl key_update + bl key_update + sub.w r1, r1, #336 + // rearrange the rkeys to their respective new representations + movw r3, #0x0055 + movt r3, #0x0055 //r3 <- 0x00550055 + movw r10, #0x3333 //r10<- 0x00003333 + movw r11, #0x000f + movt r11, #0x000f //r11<- 0x000f000f + bl rearrange_rkey_0 + add.w r1, r1, #40 + bl rearrange_rkey_0 + add.w r1, r1, #40 + bl rearrange_rkey_0 + add.w r1, r1, #40 + bl rearrange_rkey_0 + add.w r1, r1, #40 + bl rearrange_rkey_0 + add.w r1, r1, #40 + bl rearrange_rkey_0 + add.w r1, r1, #40 + bl rearrange_rkey_0 + add.w r1, r1, #40 + bl rearrange_rkey_0 + sub.w r1, r1, #272 + movw r3, #0x1111 + movt r3, #0x1111 //r3 <- 0x11111111 + movw r10, #0x0303 + movt r10, #0x0303 //r10<- 0x03030303 + bl rearrange_rkey_1 + add.w r1, r1, #40 + bl rearrange_rkey_1 + add.w r1, r1, #40 + bl rearrange_rkey_1 + add.w r1, r1, #40 + bl rearrange_rkey_1 + add.w r1, r1, #40 + bl rearrange_rkey_1 + add.w r1, r1, #40 + bl rearrange_rkey_1 + add.w r1, r1, #40 + bl rearrange_rkey_1 + add.w r1, r1, #40 + bl rearrange_rkey_1 + sub.w r1, r1, #272 + movw r3, #0xaaaa //r3 <- 0x0000aaaa + movw r10, #0x3333 //r10<- 0x00003333 + movw r11, #0xf0f0 //r11<- 0x0000f0f0 + bl rearrange_rkey_2 + add.w r1, r1, #40 + bl rearrange_rkey_2 + add.w r1, r1, #40 + bl rearrange_rkey_2 + add.w r1, r1, #40 + bl rearrange_rkey_2 + add.w r1, r1, #40 + bl rearrange_rkey_2 + add.w r1, r1, #40 + bl rearrange_rkey_2 + add.w r1, r1, #40 + bl rearrange_rkey_2 + add.w r1, r1, #40 + bl rearrange_rkey_2 + sub.w r1, r1, #272 + movw r3, #0x0a0a + movt r3, #0x0a0a //r3 <- 0x0a0a0a0a + movw r10, #0x00cc + movt r10, #0x00cc //r10<- 0x00cc00cc + bl rearrange_rkey_3 + add.w r1, r1, #40 + bl rearrange_rkey_3 + add.w r1, r1, #40 + bl rearrange_rkey_3 + add.w r1, r1, #40 + bl rearrange_rkey_3 + add.w r1, r1, #40 + bl rearrange_rkey_3 + add.w r1, r1, #40 + bl rearrange_rkey_3 + add.w r1, r1, #40 + bl rearrange_rkey_3 + add.w r1, r1, #40 + bl rearrange_rkey_3 + sub.w r1, r1, #312 + pop {r2-r12, r14} + bx lr + +.align 2 +quintuple_round: + str.w r14, [sp] + ldr.w r5, [r0], #4 + ldr.w r6, [r1], #4 //load rkey + ldr.w r7, [r1], #4 //load rkey + and r8, r11, r9 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + and r8, r4, r12, lsr #1 //permutation layer + and r12, r12, r2 + orr r12, r8, r12, lsl #3 //r12<- NIBBLE_ROR(r12, 1) + and r8, r4, r11 + and r11, r2, r11, lsr #3 + orr r11, r11, r8, lsl #1 //r11<- NIBBLE_ROR(r11, 3) + orr r14, r2, r2, lsl #1 //r14 <- 0x33333333 + and r8, r14, r10, lsr #2 + and r10, r10, r14 + orr r10, r8, r10, lsl #2 //r10<- NIBBLE_ROR(r10, 2) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + ldr.w r5, [r0], #4 + ldr.w r6, [r1], #4 //load rkey + ldr.w r7, [r1], #4 //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + mvn r14, r3, lsl #12 //r0 <- 0x0fff0fff + and r8, r14, r9, lsr #4 + and r9, r9, r3 + orr r9, r8, r9, lsl #12 //r9 <- HALF_ROR(r9, 4) + and r8, r3, r11, lsr #12 + and r11, r11, r14 + orr r11, r8, r11, lsl #4 //r11<- HALF_ROR(r11, 12) + rev16 r10, r10 //r10<- HALF_ROR(r10, 8) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + ldr.w r5, [r0], #4 + ldr.w r6, [r1], #4 //load rkey + ldr.w r7, [r1], #4 //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + orr r14, r2, r2, lsl #2 //r14 <- 0x55555555 for SWAPMOVE + eor r8, r10, r10, lsr #1 + and r8, r8, r14 + eor r10, r10, r8 + eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) + eor r8, r12, r12, lsr #1 + and r8, r8, r14, lsr #16 + eor r12, r12, r8 + eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x55550000, 1) + eor r8, r11, r11, lsr #1 + and r8, r8, r14, lsl #16 + eor r11, r11, r8 + eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x00005555, 1) + eor r10, r10, r6 //add 1st keyword + eor r11, r7, r11, ror #16 //add 2nd keyword + eor r9, r9, r5 //add rconst + ldr.w r5, [r0], #4 + ldr.w r6, [r1], #4 //load rkey + ldr.w r7, [r1], #4 //load rkey + and r8, r11, r12, ror #16 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r8, r12, ror #16 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + eor r14, r3, r3, lsl #8 //r14 <- 0x0f0f0f0f for BYTE_ROR + and r8, r14, r10, lsr #4 + and r10, r10, r14 + orr r10, r8, r10, lsl #4 //r10<- BYTE_ROR(r10, 4) + orr r14, r14, r14, lsl #2 //r14 <- 0x3f3f3f3f for BYTE_ROR + mvn r8, r14 //r8 <- 0xc0c0c0c0 for BYTE_ROR + and r8, r8, r11, lsl #6 + and r11, r14, r11, lsr #2 + orr r11, r11, r8 //r11<- BYTE_ROR(r11, 2) + mvn r8, r14, lsr #6 + and r8, r8, r9, lsr #6 + and r9, r14, r9 + orr r9, r8, r9, lsl #2 //r9 <- BYTE_ROR(r9, 6) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + ldr.w r5, [r0], #4 + ldr.w r6, [r1], #4 //load rkey + ldr.w r7, [r1], #4 //load rkey + ldr.w lr, [sp] //restore link register + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12, ror #24 + eor r10, r6, r10, ror #16 //add 1st keyword + eor r11, r7, r11, ror #8 //add 2nd keyword + eor r9, r9, r5 //add rconst + eor r9, r9, r12 //swap r9 with r12 + eor r12, r12, r9 //swap r9 with r12 + eor r9, r9, r12 //swap r9 with r12 + bx lr + +/***************************************************************************** +* Code size optimized implementation of the GIFTb-128 block cipher. +* This function simply encrypts a 128-bit block, without any operation mode. +*****************************************************************************/ +.align 2 +@ void giftb128_encrypt_block(u8 *out, const u32* rkey, const u8 *block) +.global giftb128_encrypt_block +.type giftb128_encrypt_block,%function +giftb128_encrypt_block: + push {r0,r2-r12,r14} + sub.w sp, #4 //to store 'lr' when calling 'quintuple_round' + ldm r2, {r9-r12} // load plaintext words + rev r9, r9 + rev r10, r10 + rev r11, r11 + rev r12, r12 + movw r2, #0x1111 + movt r2, #0x1111 //r2 <- 0x11111111 (for NIBBLE_ROR) + movw r3, #0x000f + movt r3, #0x000f //r3 <- 0x000f000f (for HALF_ROR) + mvn r4, r2, lsl #3 //r4 <- 0x7777777 (for NIBBLE_ROR) + adr r0, rconst //r0 <- 'rconst' address + bl quintuple_round + bl quintuple_round + bl quintuple_round + bl quintuple_round + bl quintuple_round + bl quintuple_round + bl quintuple_round + bl quintuple_round + ldr.w r0, [sp ,#4] //restore 'ctext' address + rev r9, r9 + rev r10, r10 + rev r11, r11 + rev r12, r12 + stm r0, {r9-r12} + add.w sp, #4 + pop {r0,r2-r12,r14} + bx lr + \ No newline at end of file diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/api.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/api.h new file mode 100644 index 0000000..fb1d58b --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/cofb.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/cofb.h new file mode 100644 index 0000000..c580057 --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/cofb.h @@ -0,0 +1,66 @@ +#ifndef GIFT_COFB_H_ +#define GIFT_COFB_H_ + +#define TAG_SIZE 16 +#define COFB_ENCRYPT 1 +#define COFB_DECRYPT 0 + +#define DOUBLE_HALF_BLOCK(x) ({ \ + tmp0 = (x)[0]; \ + (x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); \ + (x)[0] |= ((x)[1] & 0x80808080) << 17; \ + (x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); \ + (x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; \ +}) + +#define TRIPLE_HALF_BLOCK(x) ({ \ + tmp0 = (x)[0]; \ + tmp1 = (x)[1]; \ + (x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15); \ + (x)[0] |= ((x)[1] & 0x80808080) << 17; \ + (x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15); \ + (x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24; \ + (x)[0] ^= tmp0; \ + (x)[1] ^= tmp1; \ +}) + +#define G(x) ({ \ + tmp0 = (x)[0]; \ + tmp1 = (x)[1]; \ + (x)[0] = (x)[2]; \ + (x)[1] = (x)[3]; \ + (x)[2] = ((tmp0 & 0x7f7f7f7f) << 1) | ((tmp0 & 0x80808080) >> 15); \ + (x)[2] |= ((tmp1 & 0x80808080) << 17); \ + (x)[3] = ((tmp1 & 0x7f7f7f7f) << 1) | ((tmp1 & 0x80808080) >> 15); \ + (x)[3] |= ((tmp0 & 0x80808080) << 17); \ +}) + +#define XOR_BLOCK(x, y, z) ({ \ + (x)[0] = (y)[0] ^ (z)[0]; \ + (x)[1] = (y)[1] ^ (z)[1]; \ + (x)[2] = (y)[2] ^ (z)[2]; \ + (x)[3] = (y)[3] ^ (z)[3]; \ +}) + +#define XOR_TOP_BAR_BLOCK(x, y) ({ \ + (x)[0] ^= (y)[0]; \ + (x)[1] ^= (y)[1]; \ +}) + +#define RHO1(d, y, m, n) ({ \ + G(y); \ + padding(d,m,n); \ + XOR_BLOCK(d, d, y); \ +}) + +#define RHO(y, m, x, c, n) ({ \ + XOR_BLOCK(c, y, m); \ + RHO1(x, y, m, n); \ +}) + +#define RHO_PRIME(y, c, x, m, n) ({ \ + XOR_BLOCK(m, y, c); \ + RHO1(x, y, m, n); \ +}) + +#endif // GIFT_COFB_H_ \ No newline at end of file diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/encrypt.c b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/encrypt.c new file mode 100644 index 0000000..8eed961 --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/encrypt.c @@ -0,0 +1,141 @@ +#include +#include "cofb.h" +#include "giftb128.h" + +static inline void padding(u32* d, const u32* s, const u32 no_of_bytes){ + u32 i; + if (no_of_bytes == 0) { + d[0] = 0x00000080; // little-endian + d[1] = 0x00000000; + d[2] = 0x00000000; + d[3] = 0x00000000; + } + else if (no_of_bytes < GIFT128_BLOCK_SIZE) { + for (i = 0; i < no_of_bytes/4+1; i++) + d[i] = s[i]; + d[i-1] &= ~(0xffffffffL << (no_of_bytes % 4)*8); + d[i-1] |= 0x00000080L << (no_of_bytes % 4)*8; + for (; i < 4; i++) + d[i] = 0x00000000; + } + else { + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = s[3]; + } +} + +/**************************************************************************** +* Constant-time implementation of the GIFT-COFB authenticated cipher based on +* fixsliced GIFTb-128. Encryption/decryption is handled by the same function, +* depending on the 'mode' parameter (1/0). + ***************************************************************************/ +int giftcofb_crypt(u8* out, const u8* key, const u8* nonce, const u8* ad, + u32 ad_len, const u8* in, u32 in_len, const int encrypting) { + + u32 tmp0, tmp1, emptyA, emptyM, offset[2]; + u32 input[4], rkey[80]; + u8 Y[GIFT128_BLOCK_SIZE]; + + if (!encrypting) { + if (in_len < TAG_SIZE) + return -1; + in_len -= TAG_SIZE; + } + + if(ad_len == 0) + emptyA = 1; + else + emptyA = 0; + + if(in_len == 0) + emptyM =1; + else + emptyM = 0; + + gift128_keyschedule(key, rkey); + giftb128_encrypt_block(Y, rkey, nonce); + offset[0] = ((u32*)Y)[0]; + offset[1] = ((u32*)Y)[1]; + + while(ad_len > GIFT128_BLOCK_SIZE){ + RHO1(input, (u32*)Y, (u32*)ad, GIFT128_BLOCK_SIZE); + DOUBLE_HALF_BLOCK(offset); + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + ad += GIFT128_BLOCK_SIZE; + ad_len -= GIFT128_BLOCK_SIZE; + } + + TRIPLE_HALF_BLOCK(offset); + if((ad_len % GIFT128_BLOCK_SIZE != 0) || (emptyA)) + TRIPLE_HALF_BLOCK(offset); + if(emptyM) { + TRIPLE_HALF_BLOCK(offset); + TRIPLE_HALF_BLOCK(offset); + } + + RHO1(input, (u32*)Y, (u32*)ad, ad_len); + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + + while (in_len > GIFT128_BLOCK_SIZE){ + DOUBLE_HALF_BLOCK(offset); + if (encrypting) + RHO((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE); + else + RHO_PRIME((u32*)Y, (u32*)in, input, (u32*)out, GIFT128_BLOCK_SIZE); + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + in += GIFT128_BLOCK_SIZE; + out += GIFT128_BLOCK_SIZE; + in_len -= GIFT128_BLOCK_SIZE; + } + + if(!emptyM){ + TRIPLE_HALF_BLOCK(offset); + if(in_len % GIFT128_BLOCK_SIZE != 0) + TRIPLE_HALF_BLOCK(offset); + if (encrypting) { + RHO((u32*)Y, (u32*)in, input, (u32*)out, in_len); + out += in_len; + } + else { + RHO_PRIME((u32*)Y, (u32*)in, input, (u32*)out, in_len); + in += in_len; + } + XOR_TOP_BAR_BLOCK(input, offset); + giftb128_encrypt_block(Y, rkey, (u8*)input); + } + + if (encrypting) { // encryption mode + memcpy(out, Y, TAG_SIZE); + return 0; + } + // decrypting + tmp0 = 0; + for(tmp1 = 0; tmp1 < TAG_SIZE; tmp1++) + tmp0 |= in[tmp1] ^ Y[tmp1]; + return tmp0; +} + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + (void)nsec; + *clen = mlen + TAG_SIZE; + return giftcofb_crypt(c, k, npub, ad, adlen, m, mlen, COFB_ENCRYPT); +} + +int crypto_aead_decrypt(unsigned char* m, unsigned long long *mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char *k) { + (void)nsec; + *mlen = clen - TAG_SIZE; + return giftcofb_crypt(m, k, npub, ad, adlen, c, clen, COFB_DECRYPT); +} diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/giftb128.h b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/giftb128.h new file mode 100644 index 0000000..8c904b6 --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/giftb128.h @@ -0,0 +1,13 @@ +#ifndef GIFT128_H_ +#define GIFT128_H_ + +#define KEY_SIZE 16 +#define GIFT128_BLOCK_SIZE 16 + +typedef unsigned char u8; +typedef unsigned int u32; + +extern void gift128_keyschedule(const u8* key, u32* rkey); +extern void giftb128_encrypt_block(u8* out_block, const u32* rkey, const u8* in_block); + +#endif // GIFT128_H_ \ No newline at end of file diff --git a/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/giftb128.s b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/giftb128.s new file mode 100644 index 0000000..0d5d8e0 --- /dev/null +++ b/gift-cofb/Implementations/crypto_aead/giftcofb128v1/armcortexm_fast/giftb128.s @@ -0,0 +1,2044 @@ +/**************************************************************************** +* Fully unrolled ARM assembly implementation of the GIFTn-128 block cipher. +* This implementation focuses on speed, at the cost of a large code size. +* See "Fixslicing: A New GIFT Representation" paper available at +* https:// for more details. +* +* @author Alexandre Adomnicai, Nanyang Technological University, +* alexandre.adomnicai@ntu.edu.sg +* @date March 2020 +****************************************************************************/ + +.syntax unified +.thumb +/***************************************************************************** +* Fully unrolled implementation of the GIFT-128 key schedule according to the +* fixsliced representation. +*****************************************************************************/ +@ void gift128_keyschedule(const u8* key, u32* rkey) +.global gift128_keyschedule +.type gift128_keyschedule,%function +gift128_keyschedule: + push {r2-r12, r14} + ldm r0, {r4-r7} //load key words + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 + str.w r6, [r1, #8] + str.w r4, [r1, #12] + str.w r7, [r1] + str.w r5, [r1, #4] + // keyschedule using classical representation for the first 20 rounds + movw r12, #0x3fff + lsl r12, r12, #16 //r12<- 0x3fff0000 + movw r10, #0x000f //r10<- 0x0000000f + movw r9, #0x0fff //r9 <- 0x00000fff + // 1st classical key update + and r2, r10, r7, lsr #12 + and r3, r7, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r7, lsr #2 + orr r2, r2, r3 + and r7, r7, #0x00030000 + orr r7, r2, r7, lsl #14 + str.w r5, [r1, #16] + str.w r7, [r1, #20] + // 2nd classical key update + and r2, r10, r6, lsr #12 + and r3, r6, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r6, lsr #2 + orr r2, r2, r3 + and r6, r6, #0x00030000 + orr r6, r2, r6, lsl #14 + str.w r4, [r1, #24] + str.w r6, [r1, #28] + // 3rd classical key update + and r2, r10, r5, lsr #12 + and r3, r5, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r5, lsr #2 + orr r2, r2, r3 + and r5, r5, #0x00030000 + orr r5, r2, r5, lsl #14 + str.w r7, [r1, #32] + str.w r5, [r1, #36] + // 4th classical key update + and r2, r10, r4, lsr #12 + and r3, r4, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r4, lsr #2 + orr r2, r2, r3 + and r4, r4, #0x00030000 + orr r4, r2, r4, lsl #14 + str.w r6, [r1, #40] + str.w r4, [r1, #44] + // 5th classical key update + and r2, r10, r7, lsr #12 + and r3, r7, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r7, lsr #2 + orr r2, r2, r3 + and r7, r7, #0x00030000 + orr r7, r2, r7, lsl #14 + str.w r5, [r1, #48] + str.w r7, [r1, #52] + // 6th classical key update + and r2, r10, r6, lsr #12 + and r3, r6, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r6, lsr #2 + orr r2, r2, r3 + and r6, r6, #0x00030000 + orr r6, r2, r6, lsl #14 + str.w r4, [r1, #56] + str.w r6, [r1, #60] + // 7th classical key update + and r2, r10, r5, lsr #12 + and r3, r5, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r5, lsr #2 + orr r2, r2, r3 + and r5, r5, #0x00030000 + orr r5, r2, r5, lsl #14 + str.w r7, [r1, #64] + str.w r5, [r1, #68] + // 8th classical key update + and r2, r10, r4, lsr #12 + and r3, r4, r9 + orr r2, r2, r3, lsl #4 + and r3, r12, r4, lsr #2 + orr r2, r2, r3 + and r4, r4, #0x00030000 + orr r4, r2, r4, lsl #14 + str.w r6, [r1, #72] + str.w r4, [r1, #76] + // rearrange the rkeys to their respective new representations + // REARRANGE_RKEY_0 + movw r3, #0x0055 + movt r3, #0x0055 //r3 <- 0x00550055 + movw r10, #0x3333 //r10<- 0x00003333 + movw r11, #0x000f + movt r11, #0x000f //r11<- 0x000f000f + ldrd r6, r4, [r1] + eor r12, r6, r6, lsr #9 + and r12, r12, r3 + eor r6, r12 + eor r6, r6, r12, lsl #9 //SWAPMOVE(r6, r6, 0x00550055, 9); + eor r12, r4, r4, lsr #9 + and r12, r12, r3 + eor r4, r12 + eor r4, r4, r12, lsl #9 //SWAPMOVE(r4, r4, 0x00550055, 9); + eor r12, r6, r6, lsr #18 + and r12, r12, r10 + eor r6, r12 + eor r6, r6, r12, lsl #18 //SWAPMOVE(r6, r6, 0x3333, 18); + eor r12, r4, r4, lsr #18 + and r12, r12, r10 + eor r4, r12 + eor r4, r4, r12, lsl #18 //SWAPMOVE(r4, r4, 0x3333, 18); + eor r12, r6, r6, lsr #12 + and r12, r12, r11 + eor r6, r12 + eor r6, r6, r12, lsl #12 //SWAPMOVE(r6, r6, 0x000f000f, 12); + eor r12, r4, r4, lsr #12 + and r12, r12, r11 + eor r4, r12 + eor r4, r4, r12, lsl #12 //SWAPMOVE(r4, r4, 0x000f000f, 12); + eor r12, r6, r6, lsr #24 + and r12, r12, #0xff + eor r6, r12 + eor r6, r6, r12, lsl #24 //SWAPMOVE(r6, r6, 0x000000ff, 24); + eor r12, r4, r4, lsr #24 + and r12, r12, #0xff + eor r4, r12 + eor r4, r4, r12, lsl #24 //SWAPMOVE(r4, r4, 0x000000ff, 24); + strd r6, r4, [r1] + ldrd r6, r4, [r1, #40] + eor r12, r6, r6, lsr #9 + and r12, r12, r3 + eor r6, r12 + eor r6, r6, r12, lsl #9 //SWAPMOVE(r6, r6, 0x00550055, 9); + eor r12, r4, r4, lsr #9 + and r12, r12, r3 + eor r4, r12 + eor r4, r4, r12, lsl #9 //SWAPMOVE(r4, r4, 0x00550055, 9); + eor r12, r6, r6, lsr #18 + and r12, r12, r10 + eor r6, r12 + eor r6, r6, r12, lsl #18 //SWAPMOVE(r6, r6, 0x3333, 18); + eor r12, r4, r4, lsr #18 + and r12, r12, r10 + eor r4, r12 + eor r4, r4, r12, lsl #18 //SWAPMOVE(r4, r4, 0x3333, 18); + eor r12, r6, r6, lsr #12 + and r12, r12, r11 + eor r6, r12 + eor r6, r6, r12, lsl #12 //SWAPMOVE(r6, r6, 0x000f000f, 12); + eor r12, r4, r4, lsr #12 + and r12, r12, r11 + eor r4, r12 + eor r4, r4, r12, lsl #12 //SWAPMOVE(r4, r4, 0x000f000f, 12); + eor r12, r6, r6, lsr #24 + and r12, r12, #0xff + eor r6, r12 + eor r6, r6, r12, lsl #24 //SWAPMOVE(r6, r6, 0x000000ff, 24); + eor r12, r4, r4, lsr #24 + and r12, r12, #0xff + eor r4, r12 + eor r4, r4, r12, lsl #24 //SWAPMOVE(r4, r4, 0x000000ff, 24); + str.w r6, [r1, #40] + str.w r4, [r1, #44] + // REARRANGE_RKEY_1 + movw r3, #0x1111 + movt r3, #0x1111 + movw r10, #0x0303 + movt r10, #0x0303 + ldrd r5, r7, [r1, #8] + eor r8, r7, r7, lsr #3 + and r8, r8, r3 + eor r7, r8 + eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x11111111, 3); + eor r8, r5, r5, lsr #3 + and r8, r8, r3 + eor r5, r8 + eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x11111111, 3); + eor r8, r7, r7, lsr #6 + and r8, r8, r10 + eor r7, r8 + eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x03030303, 6); + eor r8, r5, r5, lsr #6 + and r8, r8, r10 + eor r5, r8 + eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x03030303, 6); + eor r8, r7, r7, lsr #12 + and r8, r8, r11 + eor r7, r8 + eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r5, r5, lsr #12 + and r8, r8, r11 + eor r5, r8 + eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r7, r7, lsr #24 + and r8, r8, #0xff + eor r7, r8 + eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); + eor r8, r5, r5, lsr #24 + and r8, r8, #0xff + eor r5, r8 + eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); + ldr.w r12, [r1, #48] + ldr.w r14, [r1, #52] + str.w r5, [r1, #8] + str.w r7, [r1, #12] + eor r8, r14, r14, lsr #3 + and r8, r8, r3 + eor r14, r8 + eor r14, r14, r8, lsl #3 //SWAPMOVE(r7, r7, 0x11111111, 3); + eor r8, r12, r12, lsr #3 + and r8, r8, r3 + eor r12, r8 + eor r12, r12, r8, lsl #3 //SWAPMOVE(r5, r5, 0x11111111, 3); + eor r8, r14, r14, lsr #6 + and r8, r8, r10 + eor r14, r8 + eor r14, r14, r8, lsl #6 //SWAPMOVE(r7, r7, 0x03030303, 6); + eor r8, r12, r12, lsr #6 + and r8, r8, r10 + eor r12, r8 + eor r12, r12, r8, lsl #6 //SWAPMOVE(r5, r5, 0x03030303, 6); + eor r8, r14, r14, lsr #12 + and r8, r8, r11 + eor r14, r8 + eor r14, r14, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r12, r12, lsr #12 + and r8, r8, r11 + eor r12, r8 + eor r12, r12, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r14, r14, lsr #24 + and r8, r8, #0xff + eor r14, r8 + eor r14, r14, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); + eor r8, r12, r12, lsr #24 + and r8, r8, #0xff + eor r12, r8 + eor r12, r12, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); + str.w r12, [r1, #48] + str.w r14, [r1, #52] + // REARRANGE_RKEY_2 + movw r3, #0xaaaa + movw r10, #0x3333 + movw r11, #0xf0f0 + ldrd r5, r7, [r1, #16] + eor r8, r7, r7, lsr #15 + and r8, r8, r3 + eor r7, r8 + eor r7, r7, r8, lsl #15 //SWAPMOVE(r7, r7, 0x0000aaaa, 15); + eor r8, r5, r5, lsr #15 + and r8, r8, r3 + eor r5, r8 + eor r5, r5, r8, lsl #15 //SWAPMOVE(r5, r5, 0x0000aaaa, 15); + eor r8, r7, r7, lsr #18 + and r8, r8, r10 + eor r7, r8 + eor r7, r7, r8, lsl #18 //SWAPMOVE(r7, r7, 0x00003333, 18); + eor r8, r5, r5, lsr #18 + and r8, r8, r10 + eor r5, r8 + eor r5, r5, r8, lsl #18 //SWAPMOVE(r5, r5, 0x00003333, 18); + eor r8, r7, r7, lsr #12 + and r8, r8, r11 + eor r7, r8 + eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r5, r5, lsr #12 + and r8, r8, r11 + eor r5, r8 + eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r7, r7, lsr #24 + and r8, r8, #0xff + eor r7, r8 + eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x00000ff, 24); + eor r8, r5, r5, lsr #24 + and r8, r8, #0xff + eor r5, r8 + eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); + strd r5, r7, [r1, #16] + ldrd r5, r7, [r1, #56] + eor r8, r7, r7, lsr #15 + and r8, r8, r3 + eor r7, r8 + eor r7, r7, r8, lsl #15 //SWAPMOVE(r7, r7, 0x0000aaaa, 15); + eor r8, r5, r5, lsr #15 + and r8, r8, r3 + eor r5, r8 + eor r5, r5, r8, lsl #15 //SWAPMOVE(r5, r5, 0x0000aaaa, 15); + eor r8, r7, r7, lsr #18 + and r8, r8, r10 + eor r7, r8 + eor r7, r7, r8, lsl #18 //SWAPMOVE(r7, r7, 0x00003333, 18); + eor r8, r5, r5, lsr #18 + and r8, r8, r10 + eor r5, r8 + eor r5, r5, r8, lsl #18 //SWAPMOVE(r5, r5, 0x00003333, 18); + eor r8, r7, r7, lsr #12 + and r8, r8, r11 + eor r7, r8 + eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r5, r5, lsr #12 + and r8, r8, r11 + eor r5, r8 + eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r7, r7, lsr #24 + and r8, r8, #0xff + eor r7, r8 + eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); + eor r8, r5, r5, lsr #24 + and r8, r8, #0xff + eor r5, r8 + eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); + str.w r5, [r1, #56] + str.w r7, [r1, #60] + // REARRANGE_RKEY_3 + movw r3, #0x0a0a + movt r3, #0x0a0a //r3 <- 0x0a0a0a0a + movw r10, #0x00cc + movt r10, #0x00cc //r10<- 0x00cc00cc + ldrd r5, r7, [r1, #24] + eor r8, r7, r7, lsr #3 + and r8, r8, r3 + eor r7, r8 + eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x0a0a0a0a, 3); + eor r8, r5, r5, lsr #3 + and r8, r8, r3 + eor r5, r8 + eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x0a0a0a0a, 3); + eor r8, r7, r7, lsr #6 + and r8, r8, r10 + eor r7, r8 + eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x00cc00cc, 6); + eor r8, r5, r5, lsr #6 + and r8, r8, r10 + eor r5, r8 + eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x00cc00cc, 6); + eor r8, r7, r7, lsr #12 + and r8, r8, r11 + eor r7, r8 + eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r5, r5, lsr #12 + and r8, r8, r11 + eor r5, r8 + eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r7, r7, lsr #24 + and r8, r8, #0xff + eor r7, r8 + eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x000000ff, 24); + eor r8, r5, r5, lsr #24 + and r8, r8, #0xff + eor r5, r8 + eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x000000ff, 24); + strd r5, r7, [r1, #24] + ldrd r5, r7, [r1, #64] + eor r8, r7, r7, lsr #3 + and r8, r8, r3 + eor r7, r8 + eor r7, r7, r8, lsl #3 //SWAPMOVE(r7, r7, 0x0a0a0a0a, 3); + eor r8, r5, r5, lsr #3 + and r8, r8, r3 + eor r5, r8 + eor r5, r5, r8, lsl #3 //SWAPMOVE(r5, r5, 0x0a0a0a0a, 3); + eor r8, r7, r7, lsr #6 + and r8, r8, r10 + eor r7, r8 + eor r7, r7, r8, lsl #6 //SWAPMOVE(r7, r7, 0x00cc00cc, 6); + eor r8, r5, r5, lsr #6 + and r8, r8, r10 + eor r5, r8 + eor r5, r5, r8, lsl #6 //SWAPMOVE(r5, r5, 0x00cc00cc, 6); + eor r8, r7, r7, lsr #12 + and r8, r8, r11 + eor r7, r8 + eor r7, r7, r8, lsl #12 //SWAPMOVE(r7, r7, 0x000f000f, 12); + eor r8, r5, r5, lsr #12 + and r8, r8, r11 + eor r5, r8 + eor r5, r5, r8, lsl #12 //SWAPMOVE(r5, r5, 0x000f000f, 12); + eor r8, r7, r7, lsr #24 + and r8, r8, #0xff + eor r7, r8 + eor r7, r7, r8, lsl #24 //SWAPMOVE(r7, r7, 0x0000ff00, 24); + eor r8, r5, r5, lsr #24 + and r8, r8, #0xff + eor r5, r8 + eor r5, r5, r8, lsl #24 //SWAPMOVE(r5, r5, 0x0000ff00, 24); + str.w r5, [r1, #64] + str.w r7, [r1, #68] + //keyschedule according to the new representations + // KEY_DOULBE/TRIPLE_UPDATE_0 + movw r10, #0x3333 + eor r12, r10, r10, lsl #16 + mvn r11, r12 + movw r9, #0x4444 + movt r9, #0x5555 + movw r8, #0x1100 + movt r8, #0x5555 + ldrd r4, r5, [r1] + and r2, r12, r4, ror #24 + and r4, r4, r11 + orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4) + eor r2, r4, r4, lsr #1 + and r2, r2, r8 + eor r4, r4, r2 + eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1) + eor r2, r5, r5, lsr #16 + and r2, r2, r10 + eor r5, r5, r2 + eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16) + eor r2, r5, r5, lsr #1 + and r2, r2, r9 + eor r5, r5, r2 + eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1) + str.w r5, [r1, #80] + str.w r4, [r1, #84] + and r2, r12, r5, ror #24 + and r5, r5, r11 + orr r5, r2, r5, ror #16 //KEY_TRIPLE_UPDATE_1(r5) + eor r2, r5, r5, lsr #1 + and r2, r2, r8 + eor r5, r5, r2 + eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x55551100, 1) + eor r2, r4, r4, lsr #16 + and r2, r2, r10 + eor r4, r4, r2 + eor r4, r4, r2, lsl #16 //SWAPMOVE(r4, r4, 0x00003333, 16) + eor r2, r4, r4, lsr #1 + and r2, r2, r9 + eor r4, r4, r2 + eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x555544444, 1) + str.w r4, [r1, #160] + str.w r5, [r1, #164] + and r2, r12, r4, ror #24 + and r4, r4, r11 + orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4) + eor r2, r4, r4, lsr #1 + and r2, r2, r8 + eor r4, r4, r2 + eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1) + eor r2, r5, r5, lsr #16 + and r2, r2, r10 + eor r5, r5, r2 + eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16) + eor r2, r5, r5, lsr #1 + and r2, r2, r9 + eor r5, r5, r2 + eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1) + strd r5, r4, [r1, #240] + ldrd r4, r5, [r1, #40] + and r2, r12, r4, ror #24 + and r4, r4, r11 + orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4) + eor r2, r4, r4, lsr #1 + and r2, r2, r8 + eor r4, r4, r2 + eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1) + eor r2, r5, r5, lsr #16 + and r2, r2, r10 + eor r5, r5, r2 + eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16) + eor r2, r5, r5, lsr #1 + and r2, r2, r9 + eor r5, r5, r2 + eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1) + str.w r5, [r1, #120] + str.w r4, [r1, #124] + and r2, r12, r5, ror #24 + and r5, r5, r11 + orr r5, r2, r5, ror #16 //KEY_TRIPLE_UPDATE_1(r5) + eor r2, r5, r5, lsr #1 + and r2, r2, r8 + eor r5, r5, r2 + eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x55551100, 1) + eor r2, r4, r4, lsr #16 + and r2, r2, r10 + eor r4, r4, r2 + eor r4, r4, r2, lsl #16 //SWAPMOVE(r4, r4, 0x00003333, 16) + eor r2, r4, r4, lsr #1 + and r2, r2, r9 + eor r4, r4, r2 + eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x555544444, 1) + str.w r4, [r1, #200] + str.w r5, [r1, #204] + and r2, r12, r4, ror #24 + and r4, r4, r11 + orr r4, r2, r4, ror #16 //KEY_TRIPLE_UPDATE_1(r4) + eor r2, r4, r4, lsr #1 + and r2, r2, r8 + eor r4, r4, r2 + eor r4, r4, r2, lsl #1 //SWAPMOVE(r4, r4, 0x55551100, 1) + eor r2, r5, r5, lsr #16 + and r2, r2, r10 + eor r5, r5, r2 + eor r5, r5, r2, lsl #16 //SWAPMOVE(r5, r5, 0x00003333, 16) + eor r2, r5, r5, lsr #1 + and r2, r2, r9 + eor r5, r5, r2 + eor r5, r5, r2, lsl #1 //SWAPMOVE(r5, r5, 0x555544444, 1) + str.w r5, [r1, #280] + str.w r4, [r1, #284] + // KEY_DOULBE/TRIPLE_UPDATE_2 + // masks + movw r12, #0x0f00 + movt r12, #0x0f00 + movw r11, #0x0003 + movt r11, #0x0003 + movw r10, #0x003f + movt r10, #0x003f + lsl r9, r11, #8 //r9 <- 0x03000300 + and r8, r10, r10, lsr #3 //r8 <- 0x00070007 + orr r7, r8, r8, lsl #2 //r7 <- 0x001f001f + ldrd r4, r5, [r1, #8] + and r2, r9, r4, lsr #6 + and r3, r4, r10, lsl #8 + orr r2, r2, r3, lsl #2 + and r3, r8, r4, lsr #5 + orr r2, r2, r3 + and r4, r4, r7 + orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4) + and r2, r12, r5, lsr #4 + and r3, r5, r12 + orr r2, r2, r3, lsl #4 + and r3, r11, r5, lsr #6 + orr r2, r2, r3 + and r5, r5, r10 + orr r5, r2, r5, lsl #2 //KEY_DOUBLE_UPDATE_2(r5) + str.w r5, [r1, #88] + str.w r4, [r1, #92] + and r2, r9, r5, lsr #6 + and r3, r5, r10, lsl #8 + orr r2, r2, r3, lsl #2 + and r3, r8, r5, lsr #5 + orr r2, r2, r3 + and r5, r5, r7 + orr r5, r2, r5, lsl #3 //KEY_TRIPLE_UPDATE_2(r5) + and r2, r12, r4, lsr #4 + and r3, r4, r12 + orr r2, r2, r3, lsl #4 + and r3, r11, r4, lsr #6 + orr r2, r2, r3 + and r4, r4, r10 + orr r4, r2, r4, lsl #2 //KEY_DOUBLE_UPDATE_2(r4) + str.w r4, [r1, #168] + str.w r5, [r1, #172] + and r2, r9, r4, lsr #6 + and r3, r4, r10, lsl #8 + orr r2, r2, r3, lsl #2 + and r3, r8, r4, lsr #5 + orr r2, r2, r3 + and r4, r4, r7 + orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4) + and r2, r12, r5, lsr #4 + and r3, r5, r12 + orr r2, r2, r3, lsl #4 + and r3, r11, r5, lsr #6 + orr r2, r2, r3 + and r5, r5, r10 + orr r5, r2, r5, lsl#2 //KEY_DOUBLE_UPDATE_2(r5) + strd r5, r4, [r1, #248] + ldrd r4, r5, [r1, #48] + and r2, r9, r4, lsr #6 + and r3, r4, r10, lsl #8 + orr r2, r2, r3, lsl #2 + and r3, r8, r4, lsr #5 + orr r2, r2, r3 + and r4, r4, r7 + orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4) + and r2, r12, r5, lsr #4 + and r3, r5, r12 + orr r2, r2, r3, lsl #4 + and r3, r11, r5, lsr #6 + orr r2, r2, r3 + and r5, r5, r10 + orr r5, r2, r5, lsl #2 //KEY_DOUBLE_UPDATE_2(r5) + str.w r5, [r1, #128] + str.w r4, [r1, #132] + and r2, r9, r5, lsr #6 + and r3, r5, r10, lsl #8 + orr r2, r2, r3, lsl #2 + and r3, r8, r5, lsr #5 + orr r2, r2, r3 + and r5, r5, r7 + orr r5, r2, r5, lsl #3 //KEY_TRIPLE_UPDATE_2(r5) + and r2, r12, r4, lsr #4 + and r3, r4, r12 + orr r2, r2, r3, lsl #4 + and r3, r11, r4, lsr #6 + orr r2, r2, r3 + and r4, r4, r10 + orr r4, r2, r4, lsl #2 //KEY_DOUBLE_UPDATE_2(r4) + str.w r4, [r1, #208] + str.w r5, [r1, #212] + and r2, r9, r4, lsr #6 + and r3, r4, r10, lsl #8 + orr r2, r2, r3, lsl #2 + and r3, r8, r4, lsr #5 + orr r2, r2, r3 + and r4, r4, r7 + orr r4, r2, r4, lsl #3 //KEY_TRIPLE_UPDATE_2(r4) + and r2, r12, r5, lsr #4 + and r3, r5, r12 + orr r2, r2, r3, lsl #4 + and r3, r11, r5, lsr #6 + orr r2, r2, r3 + and r5, r5, r10 + orr r5, r2, r5, lsl#2 //KEY_DOUBLE_UPDATE_2(r5) + str.w r5, [r1, #288] + str.w r4, [r1, #292] + // KEY_DOULBE/TRIPLE_UPDATE_2 + // masks + movw r12, #0x5555 + movt r12, #0x5555 + mvn r11, r12 + ldrd r4, r5, [r1, #16] + and r2, r12, r4, ror #24 + and r4, r11, r4, ror #20 + orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r4) + and r2, r11, r5, ror #24 + and r5, r12, r5, ror #16 + orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r5) + str.w r5, [r1, #96] + str.w r4, [r1, #100] + and r2, r12, r5, ror #24 + and r5, r11, r5, ror #20 + orr r5, r5, r2 //KEY_TRIPLE_UPDATE_2(r5) + and r2, r11, r4, ror #24 + and r4, r12, r4, ror #16 + orr r4, r4, r2 //KEY_DOUBLE_UPDATE_2(r4) + str.w r4, [r1, #176] + str.w r5, [r1, #180] + and r2, r12, r4, ror #24 + and r4, r11, r4, ror #20 + orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r4) + and r2, r11, r5, ror #24 + and r5, r12, r5, ror #16 + orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r5) + strd r5, r4, [r1, #256] + ldrd r4, r5, [r1, #56] + and r2, r12, r4, ror #24 + and r4, r11, r4, ror #20 + orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r5) + and r2, r11, r5, ror #24 + and r5, r12, r5, ror #16 + orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r4) + str.w r5, [r1, #136] + str.w r4, [r1, #140] + and r2, r12, r5, ror #24 + and r5, r11, r5, ror #20 + orr r5, r5, r2 //KEY_TRIPLE_UPDATE_2(r4) + and r2, r11, r4, ror #24 + and r4, r12, r4, ror #16 + orr r4, r4, r2 //KEY_DOUBLE_UPDATE_2(r5) + str.w r4, [r1, #216] + str.w r5, [r1, #220] + and r2, r12, r4, ror #24 + and r4, r11, r4, ror #20 + orr r4, r4, r2 //KEY_TRIPLE_UPDATE_2(r5) + and r2, r11, r5, ror #24 + and r5, r12, r5, ror #16 + orr r5, r5, r2 //KEY_DOUBLE_UPDATE_2(r4) + str.w r5, [r1, #296] + str.w r4, [r1, #300] + // KEY_DOULBE/TRIPLE_UPDATE_3 + // masks + orr r12, r8, r8, lsl #8 //r12<- 0x07070707 + movw r11, #0xc0c0 + movw r10, #0x3030 + and r9, r12, r12, lsr #1 //r9 <- 0x03030303 + lsl r8, r12, #4 + eor r7, r8, r9, lsl #5 + movw r6, #0xf0f0 + ldrd r4, r5, [r1, #24] + and r2, r10, r4, lsr #18 + and r3, r4, r7, lsr #4 + orr r2, r2, r3, lsl #3 + and r3, r11, r4, lsr #14 + orr r2, r2, r3 + and r3, r4, r12, lsr #11 + orr r2, r2, r3, lsl #15 + and r3, r12, r4, lsr #1 + orr r2, r2, r3 + and r4, r4, r7, lsr #16 + orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r9, r5, lsr #2 + and r3, r9, r5 + orr r2, r2, r3, lsl #2 + and r3, r8, r5, lsr #1 + orr r2, r2, r3 + and r5, r5, r7 + orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5) + str.w r5, [r1, #104] + str.w r4, [r1, #108] + and r2, r10, r5, lsr #18 + and r3, r5, r7, lsr #4 + orr r2, r2, r3, lsl #3 + and r3, r11, r5, lsr #14 + orr r2, r2, r3 + and r3, r5, r12, lsr #11 + orr r2, r2, r3, lsl #15 + and r3, r12, r5, lsr #1 + orr r2, r2, r3 + and r5, r5, r7, lsr #16 + orr r5, r2, r5, lsl #19 //KEY_TRIPLE_UPDATE_4(r5) + and r2, r9, r4, lsr #2 + and r3, r9, r4 + orr r2, r2, r3, lsl #2 + and r3, r8, r4, lsr #1 + orr r2, r2, r3 + and r4, r4, r7 + orr r4, r2, r4, lsl #3 //KEY_DOUBLE_UPDATE_4(r4) + str.w r4, [r1, #184] + str.w r5, [r1, #188] + and r2, r10, r4, lsr #18 + and r3, r4, r7, lsr #4 + orr r2, r2, r3, lsl #3 + and r3, r11, r4, lsr #14 + orr r2, r2, r3 + and r3, r4, r12, lsr #11 + orr r2, r2, r3, lsl #15 + and r3, r12, r4, lsr #1 + orr r2, r2, r3 + and r4, r4, r7, lsr #16 + orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r9, r5, lsr #2 + and r3, r9, r5 + orr r2, r2, r3, lsl #2 + and r3, r8, r5, lsr #1 + orr r2, r2, r3 + and r5, r5, r7 + orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5) + strd r5, r4, [r1, #264] + ldrd r4, r5, [r1, #64] + and r2, r10, r4, lsr #18 + and r3, r4, r7, lsr #4 + orr r2, r2, r3, lsl #3 + and r3, r11, r4, lsr #14 + orr r2, r2, r3 + and r3, r4, r12, lsr #11 + orr r2, r2, r3, lsl #15 + and r3, r12, r4, lsr #1 + orr r2, r2, r3 + and r4, r4, r7, lsr #16 + orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r9, r5, lsr #2 + and r3, r9, r5 + orr r2, r2, r3, lsl #2 + and r3, r8, r5, lsr #1 + orr r2, r2, r3 + and r5, r5, r7 + orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5) + str.w r5, [r1, #144] + str.w r4, [r1, #148] + and r2, r10, r5, lsr #18 + and r3, r5, r7, lsr #4 + orr r2, r2, r3, lsl #3 + and r3, r11, r5, lsr #14 + orr r2, r2, r3 + and r3, r5, r12, lsr #11 + orr r2, r2, r3, lsl #15 + and r3, r12, r5, lsr #1 + orr r2, r2, r3 + and r5, r5, r7, lsr #16 + orr r5, r2, r5, lsl #19 //KEY_TRIPLE_UPDATE_4(r5) + and r2, r9, r4, lsr #2 + and r3, r9, r4 + orr r2, r2, r3, lsl #2 + and r3, r8, r4, lsr #1 + orr r2, r2, r3 + and r4, r4, r7 + orr r4, r2, r4, lsl #3 //KEY_DOUBLE_UPDATE_4(r4) + str.w r4, [r1, #224] + str.w r5, [r1, #228] + and r2, r10, r4, lsr #18 + and r3, r4, r7, lsr #4 + orr r2, r2, r3, lsl #3 + and r3, r11, r4, lsr #14 + orr r2, r2, r3 + and r3, r4, r12, lsr #11 + orr r2, r2, r3, lsl #15 + and r3, r12, r4, lsr #1 + orr r2, r2, r3 + and r4, r4, r7, lsr #16 + orr r4, r2, r4, lsl #19 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r9, r5, lsr #2 + and r3, r9, r5 + orr r2, r2, r3, lsl #2 + and r3, r8, r5, lsr #1 + orr r2, r2, r3 + and r5, r5, r7 + orr r5, r2, r5, lsl #3 //KEY_DOUBLE_UPDATE_4(r5) + str.w r5, [r1, #304] + str.w r4, [r1, #308] + // KEY_DOULBE/TRIPLE_UPDATE_4 + // masks + movw r12, #0x0fff + lsl r10, r12, #16 + movw r8, #0x00ff + movw r7, #0x03ff + lsl r7, r7, #16 + ldrd r4, r5, [r1, #32] + and r2, r7, r4, lsr #6 + and r3, r4, #0x003f0000 + orr r2, r2, r3, lsl #10 + and r3, r12, r4, lsr #4 + orr r2, r2, r3 + and r4, r4, #0x000f + orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r10, r5, lsr #4 + and r3, r5, #0x000f0000 + orr r2, r2, r3, lsl #12 + and r3, r8, r5, lsr #8 + orr r2, r2, r3 + and r5, r5, r8 + orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5) + str.w r5, [r1, #112] + str.w r4, [r1, #116] + and r2, r7, r5, lsr #6 + and r3, r5, #0x003f0000 + orr r2, r2, r3, lsl #10 + and r3, r12, r5, lsr #4 + orr r2, r2, r3 + and r5, r5, #0x000f + orr r5, r2, r5, lsl #12 //KEY_TRIPLE_UPDATE_4(r5) + and r2, r10, r4, lsr #4 + and r3, r4, #0x000f0000 + orr r2, r2, r3, lsl #12 + and r3, r8, r4, lsr #8 + orr r2, r2, r3 + and r4, r4, r8 + orr r4, r2, r4, lsl #8 //KEY_DOUBLE_UPDATE_4(r4) + str.w r4, [r1, #192] + str.w r5, [r1, #196] + and r2, r7, r4, lsr #6 + and r3, r4, #0x003f0000 + orr r2, r2, r3, lsl #10 + and r3, r12, r4, lsr #4 + orr r2, r2, r3 + and r4, r4, #0x000f + orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r10, r5, lsr #4 + and r3, r5, #0x000f0000 + orr r2, r2, r3, lsl #12 + and r3, r8, r5, lsr #8 + orr r2, r2, r3 + and r5, r5, r8 + orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5) + strd r5, r4, [r1, #272] + ldrd r4, r5, [r1, #72] + and r2, r7, r4, lsr #6 + and r3, r4, #0x003f0000 + orr r2, r2, r3, lsl #10 + and r3, r12, r4, lsr #4 + orr r2, r2, r3 + and r4, r4, #0x000f + orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r10, r5, lsr #4 + and r3, r5, #0x000f0000 + orr r2, r2, r3, lsl #12 + and r3, r8, r5, lsr #8 + orr r2, r2, r3 + and r5, r5, r8 + orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5) + str.w r5, [r1, #152] + str.w r4, [r1, #156] + and r2, r7, r5, lsr #6 + and r3, r5, #0x003f0000 + orr r2, r2, r3, lsl #10 + and r3, r12, r5, lsr #4 + orr r2, r2, r3 + and r5, r5, #0x000f + orr r5, r2, r5, lsl #12 //KEY_TRIPLE_UPDATE_4(r5) + and r2, r10, r4, lsr #4 + and r3, r4, #0x000f0000 + orr r2, r2, r3, lsl #12 + and r3, r8, r4, lsr #8 + orr r2, r2, r3 + and r4, r4, r8 + orr r4, r2, r4, lsl #8 //KEY_DOUBLE_UPDATE_4(r4) + str.w r4, [r1, #232] + str.w r5, [r1, #236] + and r2, r7, r4, lsr #6 + and r3, r4, #0x003f0000 + orr r2, r2, r3, lsl #10 + and r3, r12, r4, lsr #4 + orr r2, r2, r3 + and r4, r4, #0x000f + orr r4, r2, r4, lsl #12 //KEY_TRIPLE_UPDATE_4(r4) + and r2, r10, r5, lsr #4 + and r3, r5, #0x000f0000 + orr r2, r2, r3, lsl #12 + and r3, r8, r5, lsr #8 + orr r2, r2, r3 + and r5, r5, r8 + orr r5, r2, r5, lsl #8 //KEY_DOUBLE_UPDATE_4(r5) + str.w r5, [r1, #312] + str.w r4, [r1, #316] + pop {r2-r12,r14} + bx lr + +/***************************************************************************** +* Fully unrolled ARM assembly implementation of the GIFTb-128 block cipher. +* This function simply encrypts a 128-bit block, without any operation mode. +*****************************************************************************/ +@ void giftb128_encrypt_block(u8 *out, const u32* rkey, const u8 *block) +.global giftb128_encrypt_block +.type giftb128_encrypt_block,%function +giftb128_encrypt_block: + push {r2-r12,r14} + // load plaintext blocks + ldm r2, {r9-r12} + // endianness + rev r9, r9 + rev r10, r10 + rev r11, r11 + rev r12, r12 + // masks for HALF/BYTE/NIBBLE rotations + movw r2, #0x1111 + movt r2, #0x1111 //for NIBBLE_ROR + movw r3, #0x000f + movt r3, #0x000f //for HALF_ROR + mvn r4, r2, lsl #3 //0x7777777 for NIBBLE_ROR + // ------------------ 1st QUINTUPLE ROUND ------------------ + // 1st round + movw r5, 0x0008 + movt r5, 0x1000 //load rconst + ldrd r6, r7, [r1] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + and r8, r4, r12, lsr #1 + and r12, r12, r2 + orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1) + and r8, r4, r11 + and r11, r2, r11, lsr #3 + orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) + orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR + and r8, r14, r10, lsr #2 + and r10, r10, r14 + orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 2nd round + movw r5, 0x8000 + movt r5, 0x8001 //load rconst + ldrd r6, r7, [r1, #8] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR + and r8, r14, r9, lsr #4 + and r9, r9, r3 + orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4) + and r8, r3, r11, lsr #12 + and r11, r11, r14 + orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) + rev16 r10, r10 //HALF_ROR(r10, 8) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 3rd round + movw r5, 0x0002 + movt r5, 0x5400 //load rconst + ldrd r6, r7, [r1, #16] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE + eor r8, r10, r10, lsr #1 + and r8, r8, r14 + eor r10, r10, r8 + eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) + eor r8, r12, r12, lsr #1 + and r8, r8, r14, lsr #16 + eor r12, r12, r8 + eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x55550000, 1) + eor r8, r11, r11, lsr #1 + and r8, r8, r14, lsl #16 + eor r11, r11, r8 + eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x00005555, 1) + eor r10, r10, r6 //add 1st keyword + eor r11, r7, r11, ror #16 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 4th round + movw r5, 0x0181 + movt r5, 0x0101 //load rconst + ldrd r6, r7, [r1, #24] //load rkey + and r8, r11, r12, ror #16 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r8, r12, ror #16 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR + and r8, r14, r10, lsr #4 + and r10, r10, r14 + orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) + orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR + mvn r8, r14 + and r8, r8, r11, lsl #6 + and r11, r14, r11, lsr #2 + orr r11, r11, r8 //BYTE_ROR(r11, 2) + mvn r8, r14, lsr #6 + and r8, r8, r9, lsr #6 + and r9, r14, r9 + orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 5th round + movw r5, 0x001f + movt r5, 0x8000 //load rconst + ldrd r6, r7, [r1, #32] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + eor r10, r6, r10, ror #16 //add 1st keyword + eor r11, r7, r11, ror #8 //add 2nd keyword + eor r9, r9, r5 //add rconst + + // ------------------ 2nd QUINTUPLE ROUND ------------------ + // 1st round + movw r5, 0x8880 + movt r5, 0x1088 //load rconst + ldrd r6, r7, [r1, #40] //load rkey + and r8, r11, r12, ror #24 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r8, r12, ror #24 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + and r8, r4, r9, lsr #1 + and r9, r9, r2 + orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1) + and r8, r4, r11 + and r11, r2, r11, lsr #3 + orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) + orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR + and r8, r14, r10, lsr #2 + and r10, r10, r14 + orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 2nd round + movw r5, 0xe000 + movt r5, 0x6001 //load rconst + ldrd r6, r7, [r1, #48] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR + and r8, r14, r12, lsr #4 + and r12, r12, r3 + orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4) + and r8, r3, r11, lsr #12 + and r11, r11, r14 + orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) + rev16 r10, r10 //HALF_ROR(r10, 8) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 3rd round + movw r5, 0x0002 + movt r5, 0x5150 //load rconst + ldrd r6, r7, [r1, #56] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE + eor r8, r10, r10, lsr #1 + and r8, r8, r14 + eor r10, r10, r8 + eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) + eor r8, r9, r9, lsr #1 + and r8, r8, r14, lsr #16 + eor r9, r9, r8 + eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1) + eor r8, r11, r11, lsr #1 + and r8, r8, r14, lsl #16 + eor r11, r11, r8 + eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) + eor r10, r10, r6 //add 1st keyword + eor r11, r7, r11, ror #16 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 4th round + movw r5, 0x0180 + movt r5, 0x0303 //load rconst + ldrd r6, r7, [r1, #64] //load rkey + and r8, r11, r9, ror #16 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r8, r9, ror #16 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR + and r8, r14, r10, lsr #4 + and r10, r10, r14 + orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) + orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR + mvn r8, r14 + and r8, r8, r11, lsl #6 + and r11, r14, r11, lsr #2 + orr r11, r11, r8 //BYTE_ROR(r11, 2) + mvn r8, r14, lsr #6 + and r8, r8, r12, lsr #6 + and r12, r14, r12 + orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 5th round + movw r5, 0x002f + movt r5, 0x8000 //load rconst + ldrd r6, r7, [r1, #72] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + eor r10, r6, r10, ror #16 //add 1st keyword + eor r11, r7, r11, ror #8 //add 2nd keyword + eor r12, r12, r5 //add rconst + + // ------------------ 3rd QUINTUPLE ROUND ------------------ + // 1st round + movw r5, 0x8880 + movt r5, 0x1008 //load rconst + ldrd r6, r7, [r1, #80] //load rkey + and r8, r11, r9, ror #24 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r8, r9, ror #24 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + and r8, r4, r12, lsr #1 + and r12, r12, r2 + orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1) + and r8, r4, r11 + and r11, r2, r11, lsr #3 + orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) + orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR + and r8, r14, r10, lsr #2 + and r10, r10, r14 + orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 2nd round + movw r5, 0x6000 + movt r5, 0x6001 //load rconst + ldrd r6, r7, [r1, #88] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR + and r8, r14, r9, lsr #4 + and r9, r9, r3 + orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4) + and r8, r3, r11, lsr #12 + and r11, r11, r14 + orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) + rev16 r10, r10 //HALF_ROR(r10, 8) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 3rd round + movw r5, 0x0002 + movt r5, 0x4150 //load rconst + ldrd r6, r7, [r1, #96] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE + eor r8, r10, r10, lsr #1 + and r8, r8, r14 + eor r10, r10, r8 + eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) + eor r8, r12, r12, lsr #1 + and r8, r8, r14, lsr #16 + eor r12, r12, r8 + eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x00005555, 1) + eor r8, r11, r11, lsr #1 + and r8, r8, r14, lsl #16 + eor r11, r11, r8 + eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) + eor r10, r10, r6 //add 1st keyword + eor r11, r7, r11, ror #16 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 4th round + movw r5, 0x0080 + movt r5, 0x0303 //load rconst + ldrd r6, r7, [r1, #104] //load rkey + and r8, r11, r12, ror #16 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r8, r12, ror #16 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR + and r8, r14, r10, lsr #4 + and r10, r10, r14 + orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) + orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR + mvn r8, r14 + and r8, r8, r11, lsl #6 + and r11, r14, r11, lsr #2 + orr r11, r11, r8 //BYTE_ROR(r11, 2) + mvn r8, r14, lsr #6 + and r8, r8, r9, lsr #6 + and r9, r14, r9 + orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 5th round + movw r5, 0x0027 + movt r5, 0x8000 //load rconst + ldrd r6, r7, [r1, #112] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + eor r10, r6, r10, ror #16 //add 1st keyword + eor r11, r7, r11, ror #8 //add 2nd keyword + eor r9, r9, r5 //add rconst + + // ------------------ 4th QUINTUPLE ROUND ------------------ + // 1st round + movw r5, 0x8880 + movt r5, 0x1000 //load rconst + ldrd r6, r7, [r1, #120] //load rkey + and r8, r11, r12, ror #24 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r8, r12, ror #24 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + and r8, r4, r9, lsr #1 + and r9, r9, r2 + orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1) + and r8, r4, r11 + and r11, r2, r11, lsr #3 + orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) + orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR + and r8, r14, r10, lsr #2 + and r10, r10, r14 + orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 2nd round + movw r5, 0xe000 + movt r5, 0x4001 //load rconst + ldrd r6, r7, [r1, #128] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR + and r8, r14, r12, lsr #4 + and r12, r12, r3 + orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4) + and r8, r3, r11, lsr #12 + and r11, r11, r14 + orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) + rev16 r10, r10 //HALF_ROR(r10, 8) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 3rd round + movw r5, 0x0002 + movt r5, 0x1150 //load rconst + ldrd r6, r7, [r1, #136] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE + eor r8, r10, r10, lsr #1 + and r8, r8, r14 + eor r10, r10, r8 + eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) + eor r8, r9, r9, lsr #1 + and r8, r8, r14, lsr #16 + eor r9, r9, r8 + eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1) + eor r8, r11, r11, lsr #1 + and r8, r8, r14, lsl #16 + eor r11, r11, r8 + eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) + eor r10, r10, r6 //add 1st keyword + eor r11, r7, r11, ror #16 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 4th round + movw r5, 0x0180 + movt r5, 0x0302 //load rconst + ldrd r6, r7, [r1, #144] //load rkey + and r8, r11, r9, ror #16 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r8, r9, ror #16 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR + and r8, r14, r10, lsr #4 + and r10, r10, r14 + orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) + orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR + mvn r8, r14 + and r8, r8, r11, lsl #6 + and r11, r14, r11, lsr #2 + orr r11, r11, r8 //BYTE_ROR(r11, 2) + mvn r8, r14, lsr #6 + and r8, r8, r12, lsr #6 + and r12, r14, r12 + orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 5th round + movw r5, 0x002b + movt r5, 0x8000 //load rconst + ldrd r6, r7, [r1, #152] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + eor r10, r6, r10, ror #16 //add 1st keyword + eor r11, r7, r11, ror #8 //add 2nd keyword + eor r12, r12, r5 //add rconst + + // ------------------ 5th QUINTUPLE ROUND ------------------ + // 1st round + movw r5, 0x0880 + movt r5, 0x1008 //load rconst + ldrd r6, r7, [r1, #160] //load rkey + and r8, r11, r9, ror #24 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r8, r9, ror #24 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + and r8, r4, r12, lsr #1 + and r12, r12, r2 + orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1) + and r8, r4, r11 + and r11, r2, r11, lsr #3 + orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) + orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR + and r8, r14, r10, lsr #2 + and r10, r10, r14 + orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 2nd round + movw r5, 0x4000 + movt r5, 0x6001 //load rconst + ldrd r6, r7, [r1, #168] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR + and r8, r14, r9, lsr #4 + and r9, r9, r3 + orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4) + and r8, r3, r11, lsr #12 + and r11, r11, r14 + orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) + rev16 r10, r10 //HALF_ROR(r10, 8) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 3rd round + movw r5, 0x0002 + movt r5, 0x0140 //load rconst + ldrd r6, r7, [r1, #176] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE + eor r8, r10, r10, lsr #1 + and r8, r8, r14 + eor r10, r10, r8 + eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) + eor r8, r12, r12, lsr #1 + and r8, r8, r14, lsr #16 + eor r12, r12, r8 + eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x00005555, 1) + eor r8, r11, r11, lsr #1 + and r8, r8, r14, lsl #16 + eor r11, r11, r8 + eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) + eor r10, r10, r6 //add 1st keyword + eor r11, r7, r11, ror #16 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 4th round + movw r5, 0x0080 + movt r5, 0x0202 //load rconst + ldrd r6, r7, [r1, #184] //load rkey + and r8, r11, r12, ror #16 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r8, r12, ror #16 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR + and r8, r14, r10, lsr #4 + and r10, r10, r14 + orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) + orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR + mvn r8, r14 + and r8, r8, r11, lsl #6 + and r11, r14, r11, lsr #2 + orr r11, r11, r8 //BYTE_ROR(r11, 2) + mvn r8, r14, lsr #6 + and r8, r8, r9, lsr #6 + and r9, r14, r9 + orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 5th round + movw r5, 0x0021 + movt r5, 0x8000 //load rconst + ldrd r6, r7, [r1, #192] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + eor r10, r6, r10, ror #16 //add 1st keyword + eor r11, r7, r11, ror #8 //add 2nd keyword + eor r9, r9, r5 //add rconst + + // ------------------ 6th QUINTUPLE ROUND ------------------ + // 1st round + movw r5, 0x0080 + movt r5, 0x1000 //load rconst + ldrd r6, r7, [r1, #200] //load rkey + and r8, r11, r12, ror #24 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r8, r12, ror #24 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + and r8, r4, r9, lsr #1 + and r9, r9, r2 + orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1) + and r8, r4, r11 + and r11, r2, r11, lsr #3 + orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) + orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR + and r8, r14, r10, lsr #2 + and r10, r10, r14 + orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 2nd round + movw r5, 0xc000 + movt r5, 0x0001 //load rconst + ldrd r6, r7, [r1, #208] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR + and r8, r14, r12, lsr #4 + and r12, r12, r3 + orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4) + and r8, r3, r11, lsr #12 + and r11, r11, r14 + orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) + rev16 r10, r10 //HALF_ROR(r10, 8) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 3rd round + movw r5, 0x0002 + movt r5, 0x5100 //load rconst + ldrd r6, r7, [r1, #216] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE + eor r8, r10, r10, lsr #1 + and r8, r8, r14 + eor r10, r10, r8 + eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) + eor r8, r9, r9, lsr #1 + and r8, r8, r14, lsr #16 + eor r9, r9, r8 + eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1) + eor r8, r11, r11, lsr #1 + and r8, r8, r14, lsl #16 + eor r11, r11, r8 + eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) + eor r10, r10, r6 //add 1st keyword + eor r11, r7, r11, ror #16 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 4th round + movw r5, 0x0180 + movt r5, 0x0301 //load rconst + ldrd r6, r7, [r1, #224] //load rkey + and r8, r11, r9, ror #16 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r8, r9, ror #16 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR + and r8, r14, r10, lsr #4 + and r10, r10, r14 + orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) + orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR + mvn r8, r14 + and r8, r8, r11, lsl #6 + and r11, r14, r11, lsr #2 + orr r11, r11, r8 //BYTE_ROR(r11, 2) + mvn r8, r14, lsr #6 + and r8, r8, r12, lsr #6 + and r12, r14, r12 + orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 5th round + movw r5, 0x002e + movt r5, 0x8000 //load rconst + ldrd r6, r7, [r1, #232] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + eor r10, r6, r10, ror #16 //add 1st keyword + eor r11, r7, r11, ror #8 //add 2nd keyword + eor r12, r12, r5 //add rconst + + + // ------------------ 7th QUINTUPLE ROUND ------------------ + // 1st round + movw r5, 0x8800 + movt r5, 0x1008 //load rconst + ldrd r6, r7, [r1, #240] //load rkey + and r8, r11, r9, ror #24 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r8, r9, ror #24 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + and r8, r4, r12, lsr #1 + and r12, r12, r2 + orr r12, r8, r12, lsl #3 //NIBBLE_ROR(r12, 1) + and r8, r4, r11 + and r11, r2, r11, lsr #3 + orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) + orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR + and r8, r14, r10, lsr #2 + and r10, r10, r14 + orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 2nd round + movw r5, 0x2000 + movt r5, 0x6001 //load rconst + ldrd r6, r7, [r1, #248] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR + and r8, r14, r9, lsr #4 + and r9, r9, r3 + orr r9, r8, r9, lsl #12 //HALF_ROR(r9, 4) + and r8, r3, r11, lsr #12 + and r11, r11, r14 + orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) + rev16 r10, r10 //HALF_ROR(r10, 8) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 3rd round + movw r5, 0x0002 + movt r5, 0x4050 //load rconst + ldrd r6, r7, [r1, #256] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE + eor r8, r10, r10, lsr #1 + and r8, r8, r14 + eor r10, r10, r8 + eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) + eor r8, r12, r12, lsr #1 + and r8, r8, r14, lsr #16 + eor r12, r12, r8 + eor r12, r12, r8, lsl #1 //SWAPMOVE(r12, r12, 0x00005555, 1) + eor r8, r11, r11, lsr #1 + and r8, r8, r14, lsl #16 + eor r11, r11, r8 + eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) + eor r10, r10, r6 //add 1st keyword + eor r11, r7, r11, ror #16 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 4th round + movw r5, 0x0080 + movt r5, 0x0103 //load rconst + ldrd r6, r7, [r1, #264] //load rkey + and r8, r11, r12, ror #16 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r8, r12, ror #16 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR + and r8, r14, r10, lsr #4 + and r10, r10, r14 + orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) + orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR + mvn r8, r14 + and r8, r8, r11, lsl #6 + and r11, r14, r11, lsr #2 + orr r11, r11, r8 //BYTE_ROR(r11, 2) + mvn r8, r14, lsr #6 + and r8, r8, r9, lsr #6 + and r9, r14, r9 + orr r9, r8, r9, lsl #2 //BYTE_ROR(r9, 6) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 5th round + movw r5, 0x0006 + movt r5, 0x8000 //load rconst + ldrd r6, r7, [r1, #272] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + eor r10, r6, r10, ror #16 //add 1st keyword + eor r11, r7, r11, ror #8 //add 2nd keyword + eor r9, r9, r5 //add rconst + + // ------------------ 8th QUINTUPLE ROUND ------------------ + // 1st round + movw r5, 0x8808 + movt r5, 0x1000 //load rconst + ldrd r6, r7, [r1, #280] //load rkey + and r8, r11, r12, ror #24 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r8, r12, ror #24 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + and r8, r4, r9, lsr #1 + and r9, r9, r2 + orr r9, r8, r9, lsl #3 //NIBBLE_ROR(r9, 1) + and r8, r4, r11 + and r11, r2, r11, lsr #3 + orr r11, r11, r8, lsl #1 //NIBBLE_ROR(r11, 3) + orr r14, r2, r2, lsl #1 //0x33333333 for NIBBLE_ROR + and r8, r14, r10, lsr #2 + and r10, r10, r14 + orr r10, r8, r10, lsl #2 //NIBBLE_ROR(r10, 2) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 2nd round + movw r5, 0xa000 + movt r5, 0xc001 //load rconst + ldrd r6, r7, [r1, #288] //load rkey + and r8, r9, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r9, r8 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + mvn r14, r3, lsl #12 //0x0fff0fff for HALF_ROR + and r8, r14, r12, lsr #4 + and r12, r12, r3 + orr r12, r8, r12, lsl #12 //HALF_ROR(r12, 4) + and r8, r3, r11, lsr #12 + and r11, r11, r14 + orr r11, r8, r11, lsl #4 //HALF_ROR(r11, 12) + rev16 r10, r10 //HALF_ROR(r10, 8) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 3rd round + movw r5, 0x0002 + movt r5, 0x1450 //load rconst + ldrd r6, r7, [r1, #296] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9 + orr r14, r2, r2, lsl #2 //0x55555555 for SWAPMOVE + eor r8, r10, r10, lsr #1 + and r8, r8, r14 + eor r10, r10, r8 + eor r10, r10, r8, lsl #1 //SWAPMOVE(r10, r10, 0x55555555, 1) + eor r8, r9, r9, lsr #1 + and r8, r8, r14, lsr #16 + eor r9, r9, r8 + eor r9, r9, r8, lsl #1 //SWAPMOVE(r9, r9, 0x00005555, 1) + eor r8, r11, r11, lsr #1 + and r8, r8, r14, lsl #16 + eor r11, r11, r8 + eor r11, r11, r8, lsl #1 //SWAPMOVE(r11, r11, 0x55550000, 1) + eor r10, r10, r6 //add 1st keyword + eor r11, r7, r11, ror #16 //add 2nd keyword + eor r12, r12, r5 //add rconst + // 4th round + movw r5, 0x0181 + movt r5, 0x0102 //load rconst + ldrd r6, r7, [r1, #304] //load rkey + and r8, r11, r9, ror #16 //sbox layer + eor r10, r10, r8 + and r8, r10, r12 + eor r9, r8, r9, ror #16 + orr r8, r9, r10 + eor r11, r11, r8 + eor r12, r12, r11 + eor r10, r10, r12 + and r8, r9, r10 + eor r11, r11, r8 + mvn r12, r12 + eor r14, r3, r3, lsl #8 //0x0f0f0f0f for BYTE_ROR + and r8, r14, r10, lsr #4 + and r10, r10, r14 + orr r10, r8, r10, lsl #4 //BYTE_ROR(r10, 4) + orr r14, r14, r14, lsl #2 //0x3f3f3f3f for BYTE_ROR + mvn r8, r14 + and r8, r8, r11, lsl #6 + and r11, r14, r11, lsr #2 + orr r11, r11, r8 //BYTE_ROR(r11, 2) + mvn r8, r14, lsr #6 + and r8, r8, r12, lsr #6 + and r12, r14, r12 + orr r12, r8, r12, lsl #2 //BYTE_ROR(r12, 6) + eor r10, r10, r6 //add 1st keyword + eor r11, r11, r7 //add 2nd keyword + eor r9, r9, r5 //add rconst + // 5th round + movw r5, 0x001a + movt r5, 0x8000 //load rconst + ldrd r6, r7, [r1, #312] //load rkey + and r8, r12, r11 //sbox layer + eor r10, r10, r8 + and r8, r10, r9 + eor r12, r12, r8 + orr r8, r12, r10 + eor r11, r11, r8 + eor r9, r9, r11 + eor r10, r10, r9 + and r8, r12, r10 + eor r11, r11, r8 + mvn r9, r9, ror #24 + eor r10, r6, r10, ror #16 //add 1st keyword + eor r11, r7, r11, ror #8 //add 2nd keyword + eor r12, r12, r5 //add rconst + // endianness + rev r9, r9 + rev r10, r10 + rev r11, r11 + rev r12, r12 + stm r0, {r9-r12} + pop {r2-r12,r14} + bx lr -- libgit2 0.26.0