;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; sparkle384_v6m.asm: ARMv6-M implementation of the SPARKLE384 permutation. ;; ;; This file is part of the SPARKLE submission to NIST's LW Crypto Project. ;; ;; Version 1.1.2 (2020-10-30), see for updates. ;; ;; Authors: The SPARKLE Group (C. Beierle, A. Biryukov, L. Cardoso dos ;; ;; Santos, J. Groszschaedl, L. Perrin, A. Udovenko, V. Velichkov, Q. Wang). ;; ;; License: GPLv3 (see LICENSE file), other licenses available upon request. ;; ;; Copyright (C) 2019-2020 University of Luxembourg . ;; ;; ------------------------------------------------------------------------- ;; ;; This program is free software: you can redistribute it and/or modify it ;; ;; under the terms of the GNU General Public License as published by the ;; ;; Free Software Foundation, either version 3 of the License, or (at your ;; ;; option) any later version. This program is distributed in the hope that ;; ;; it will be useful, but WITHOUT ANY WARRANTY; without even the implied ;; ;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; ;; GNU General Public License for more details. You should have received a ;; ;; copy of the GNU General Public License along with this program. If not, ;; ;; see . ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; THUMB PRESERVE8 AREA sparkle_code, CODE, READONLY, ALIGN=2 EXPORT sparkle384_arm [CODE] ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;; REGISTER NAMES AND CONSTANTS ;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; register sptr holds the start address of array 'state' sptr RN r0 ;; register cptr holds the start address of array 'rcon' cptr RN r1 ;; register imm holds an immediate value imm RN r1 ;; register cnt holds the step counter (for loop termination) cnt RN r6 ;; register step holds the number of steps (parameter 'steps') step RN r7 ;; registers xlw and ylw hold x-word and y-word of a left-side branch xlw RN r2 ylw RN r3 ;; registers xrw and yrw hold x-word and y-word of a right-side branch xrw RN r4 yrw RN r5 ;; register clw and crw hold round-constant for left and right branch clw RN r6 crw RN r7 ;; registers tmpx and tmpy hold temporary values tmpx RN r6 tmpy RN r7 ;; registers tw0 to tw5 are high registers (used as temporary storage) tw0 RN r8 tw1 RN r9 tw2 RN r10 tw3 RN r11 tw4 RN r12 tw5 RN lr ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;; MACROS FOR SPARKLE384 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; MACRO PROLOGUE_384 ;; push callee-saved registers push {r4-r7,lr} mov r3, r8 mov r4, r9 mov r5, r10 mov r6, r11 mov r7, r12 push {r3-r7} ;; load the left-side branches ldm sptr!, {xlw-crw} mov tw2, xrw mov tw3, yrw mov tw4, clw mov tw5, crw ;; initialize 'steps' register movs step, r1 MEND MACRO EPILOGUE_384 ;; store the left-side branches subs sptr, #24 stm sptr!, {xlw-ylw} mov xlw, tw2 mov ylw, tw3 mov xrw, tw4 mov yrw, tw5 stm sptr!, {xlw-yrw} ;; pop callee-saved registers pop {r3-r7} mov r8, r3 mov r9, r4 mov r10, r5 mov r11, r6 mov r12, r7 pop {r4-r7,pc} MEND MACRO ADD_STEP_CNT_384 ;; add cnt to y1 (in temp register tw3) mov imm, tw3 eors imm, cnt mov tw3, imm ;; add round-constant RCON[cnt&7] to y0 ldr cptr, =RCON movs step, #7 ands step, cnt lsls step, #2 ldr step, [cptr, step] eors ylw, step MEND MACRO ARX_BOX_PAIR ;; y = y >>> 31; x = x + y movs imm, #31 rors ylw, imm adds xlw, ylw rors yrw, imm adds xrw, yrw ;; y = y >>> 09; y = y ^ x movs imm, #9 rors ylw, imm eors ylw, xlw rors yrw, imm eors yrw, xrw ;; x = x ^ rcon eors xlw, clw eors xrw, crw ;; y = y >>> 09; x = x + y rors ylw, imm adds xlw, ylw rors yrw, imm adds xrw, yrw ;; y = y >>> 30; y = y ^ x movs imm, #30 rors ylw, imm eors ylw, xlw rors yrw, imm eors yrw, xrw ;; x = x ^ rcon eors xlw, clw eors xrw, crw ;; y = y >>> 17; x = x + y movs imm, #17 rors ylw, imm adds xlw, ylw rors yrw, imm adds xrw, yrw ;; y = y >>> 01; y = y ^ x movs imm, #1 rors ylw, imm eors ylw, xlw rors yrw, imm eors yrw, xrw ;; x = x ^ rcon eors xlw, clw eors xrw, crw ;; y = y >>> 17; x = x + y movs imm, #23 rors ylw, imm adds xlw, ylw rors yrw, imm adds xrw, yrw ;; y = y >>> 01; y = y ^ x movs imm, #24 rors ylw, imm eors ylw, xlw rors yrw, imm eors yrw, xrw ;; x = x ^ rcon eors xlw, clw eors xrw, crw ;; y = y >>> 16 movs imm, #16 rors ylw, imm rors yrw, imm MEND MACRO LD_BRANS_0_3 ;; branch 0 (i.e. x0, y0) already in registers xlw, ylw ;; load branch 3 (i.e. x3, y3) to registers xrw, yrw ldm sptr!, {xrw-yrw} ;; load round-constants clw = RCON[0] and crw = RCON[3] ldr clw, [cptr, #0] ldr crw, [cptr, #12] MEND MACRO LD_BRANS_1_4 ;; branch 1 (i.e. x1, y1) already in registers xlw, ylw ;; load branch 4 (i.e. x4, y4) to registers xrw, yrw ldm sptr!, {xrw-yrw} ;; load round-constants clw = RCON[1] and crw = RCON[4] ldr cptr, =RCON ldr clw, [cptr, #4] ldr crw, [cptr, #16] MEND MACRO LD_BRANS_2_5 ;; branch 2 (i.e. x2, y2) already in registers xlw, ylw ;; load branch 5 (i.e. x5, y5) to registers xrw, yrw ldm sptr!, {xrw-yrw} ;; load round-constants clw = RCON[2] and crw = RCON[5] ldr cptr, =RCON ldr clw, [cptr, #8] ldr crw, [cptr, #20] MEND MACRO ST_BRANS_0_3 ;; tmpx = x0, tmpy = y0 mov tw0, xlw mov tw1, ylw ;; left branch is XORed to right branch eors xrw, xlw eors yrw, ylw ;; store left branch in the state-array subs sptr, #8 stm sptr!, {xlw-ylw} ;; load left branch of next pair of ARX-boxes mov xlw, tw2 mov ylw, tw3 ;; move right branch to temp regs tw2 and tw3 mov tw2, xrw mov tw3, yrw MEND MACRO ST_BRANS_1_4 ;; compute tmpx = tmpx ^ x1, tmpy = tmpy ^ y1 mov tmpx, tw0 mov tmpy, tw1 eors tmpx, xlw eors tmpy, ylw mov tw0, tmpx mov tw1, tmpy ;; left branch is XORed to right branch eors xrw, xlw eors yrw, ylw ;; store left branch in the state-array subs sptr, #8 stm sptr!, {xlw-ylw} ;; load left branch of next pair of ARX-boxes mov xlw, tw4 mov ylw, tw5 ;; move right branch to temp regs tw4 and tw5 mov tw4, xrw mov tw5, yrw MEND MACRO ST_BRANS_2_5 ;; compute tmpx = tmpx ^ x2, tmpy = tmpy ^ y2 mov tmpx, tw0 mov tmpy, tw1 eors tmpx, xlw eors tmpy, ylw ;; left branch is XORed to right branch eors xrw, xlw eors yrw, ylw ;; store left branch in the state-array subs sptr, #8 stm sptr!, {xlw-ylw} ;; state-pointer contains address of x3 subs sptr, #24 MEND MACRO ARXBOX_LAYER_384 ;; compute branch 0 (x0, y0) and branch 3 (x3, y3) LD_BRANS_0_3 ARX_BOX_PAIR ST_BRANS_0_3 ;; compute branch 1 (x1, y1) and branch 4 (x4, y4) LD_BRANS_1_4 ARX_BOX_PAIR ST_BRANS_1_4 ;; compute branch 2 (x2, y2) and branch 5 (x5, y5) LD_BRANS_2_5 ARX_BOX_PAIR ST_BRANS_2_5 ;; branch 3 (i.e. x3, y3) is in temp regs tw2, tw3 ;; branch 4 (i.e. x4, y4) is in temp regs tw4, tw5 ;; branch 5 (i.e. x5, y5) is in regs xrw, yrw MEND MACRO LINEAR_LAYER_384 ;; compute tmpx = ELL(tmpx), tmpy = ELL(tmpy) mov xlw, tmpx mov ylw, tmpy lsls xlw, #16 lsls ylw, #16 eors tmpx, xlw eors tmpy, ylw movs imm, #16 rors tmpx, imm rors tmpy, imm ;; compute x4 = x4 ^ tmpy and y4 = y4 ^ tmpx ;; branch 4 becomes branch 0 in next iteration mov xlw, tw4 eors xlw, tmpy mov ylw, tw5 eors ylw, tmpx ;; compute x3 = x3 ^ tmpy and y3 = y3 ^ tmpx ;; branch 3 becomes branch 2 in next iteration mov imm, tw2 eors imm, tmpy mov tw4, imm mov imm, tw3 eors imm, tmpx mov tw5, imm ;; compute x5 = x5 ^ tmpy and y5 = y5 ^ tmpx ;; branch 5 becomes branch 1 in next iteration eors xrw, tmpy mov tw2, xrw eors yrw, tmpx mov tw3, yrw MEND ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;; SPARKLE384 PERMUTATION (BRANCH-UNROLLED) ;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Function prototype: ;; ------------------- ;; void sparkle384_arm(uint32_t *state, int steps) ;; ;; Parameters: ;; ----------- ;; state: pointer to an uint32_t-array containing the 12 state words ;; steps: number of steps ;; ;; Return value: ;; ------------- ;; None sparkle384_arm PROC PROLOGUE_384 ;; push callee-saved registers and load state movs cnt, #0 ;; initialize step-counter loop_384 ;; start of loop push {cnt,step} ;; push step-counter and 'steps' to free registers ADD_STEP_CNT_384 ;; macro to add step-counter to state ARXBOX_LAYER_384 ;; macro for the ARXBOX layer LINEAR_LAYER_384 ;; macro for the linear layer pop {cnt,step} ;; restore step-counter and 'steps' from stack adds cnt, #1 ;; increment step-counter cmp cnt, step ;; test whether step-counter equals 'steps' beq lend_384 ;; if yes then branch to end of loop b loop_384 ;; if not then branch to start of loop lend_384 ;; end of loop EPILOGUE_384 ;; store state and pop callee-saved registers ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;; SPARKLE ROUND CONSTANTS ;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; This implementation places the round constants in the .data segment, which ;; means they are loaded from RAM during the computation of the ARX-boxes. It ;; would also be possible to place them in the .rodata segment (by replacing ;; the "READWRITE" attribute in the AREA directive below by "READONLY") so that ;; they are loaded from flash, which reduces the RAM consumption by 32 bytes, ;; but may increase the execution time on devices with a high number of flash ;; wait states. AREA sparkle_rcon, DATA, READWRITE, ALIGN=2 ;; round constants RCON DCD 0xB7E15162, 0xBF715880, 0x38B4DA56, 0x324E7738, \ 0xBB1185EB, 0x4F7C7B57, 0xCFBFA1C8, 0xC2B3293D END