/////////////////////////////////////////////////////////////////////////////// // sparkle384_v6m.S: ARMv6-M implementation of the SPARKLE384 permutation. // // This file is part of the SPARKLE submission to NIST's LW Crypto Project. // // Version 1.1.2 (2020-10-30), see for updates. // // Authors: The SPARKLE Group (C. Beierle, A. Biryukov, L. Cardoso dos // // Santos, J. Groszschaedl, L. Perrin, A. Udovenko, V. Velichkov, Q. Wang). // // License: GPLv3 (see LICENSE file), other licenses available upon request. // // Copyright (C) 2019-2020 University of Luxembourg . // // ------------------------------------------------------------------------- // // This program is free software: you can redistribute it and/or modify it // // under the terms of the GNU General Public License as published by the // // Free Software Foundation, either version 3 of the License, or (at your // // option) any later version. This program is distributed in the hope that // // it will be useful, but WITHOUT ANY WARRANTY; without even the implied // // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. You should have received a // // copy of the GNU General Public License along with this program. If not, // // see . // /////////////////////////////////////////////////////////////////////////////// .syntax unified .thumb .arch armv6-m .eabi_attribute Tag_ABI_align_preserved, 1 .section .text .balign 4 .global sparkle384_arm /////////////////////////////////////////////////////////////////////////////// //////////////////////// REGISTER NAMES AND CONSTANTS ///////////////////////// /////////////////////////////////////////////////////////////////////////////// // register sptr holds the start address of array 'state' sptr .req r0 // register cptr holds the start address of array 'rcon' cptr .req r1 // register imm holds an immediate value imm .req r1 // register cnt holds the step counter (for loop termination) cnt .req r6 // register step holds the number of steps (parameter 'steps') step .req r7 // registers xlw and ylw hold x-word and y-word of a left-side branch xlw .req r2 ylw .req r3 // registers xrw and yrw hold x-word and y-word of a right-side branch xrw .req r4 yrw .req r5 // register clw and crw hold round-constant for left and right branch clw .req r6 crw .req r7 // registers tmpx and tmpy hold temporary values tmpx .req r6 tmpy .req r7 // registers tw0 to tw5 are high registers (used as temporary storage) tw0 .req r8 tw1 .req r9 tw2 .req r10 tw3 .req r11 tw4 .req r12 tw5 .req lr /////////////////////////////////////////////////////////////////////////////// //////////////////////////// MACROS FOR SPARKLE384 //////////////////////////// /////////////////////////////////////////////////////////////////////////////// .macro PROLOGUE_384 // push callee-saved registers push {r4-r7,lr} mov r3, r8 mov r4, r9 mov r5, r10 mov r6, r11 mov r7, r12 push {r3-r7} // load the left-side branches ldm sptr!, {xlw-crw} mov tw2, xrw mov tw3, yrw mov tw4, clw mov tw5, crw // initialize 'steps' register movs step, r1 .endm .macro EPILOGUE_384 // store the left-side branches subs sptr, #24 stm sptr!, {xlw-ylw} mov xlw, tw2 mov ylw, tw3 mov xrw, tw4 mov yrw, tw5 stm sptr!, {xlw-yrw} // pop callee-saved registers pop {r3-r7} mov r8, r3 mov r9, r4 mov r10, r5 mov r11, r6 mov r12, r7 pop {r4-r7,pc} .endm .macro ADD_STEP_CNT_384 // add cnt to y1 (in temp register tw3) mov imm, tw3 eors imm, cnt mov tw3, imm // add round-constant RCON[cnt&7] to y0 ldr cptr, =RCON movs step, #7 ands step, cnt lsls step, #2 ldr step, [cptr, step] eors ylw, step .endm .macro ARX_BOX_PAIR // y = y >>> 31; x = x + y movs imm, #31 rors ylw, imm adds xlw, ylw rors yrw, imm adds xrw, yrw // y = y >>> 09; y = y ^ x movs imm, #9 rors ylw, imm eors ylw, xlw rors yrw, imm eors yrw, xrw // x = x ^ rcon eors xlw, clw eors xrw, crw // y = y >>> 09; x = x + y rors ylw, imm adds xlw, ylw rors yrw, imm adds xrw, yrw // y = y >>> 30; y = y ^ x movs imm, #30 rors ylw, imm eors ylw, xlw rors yrw, imm eors yrw, xrw // x = x ^ rcon eors xlw, clw eors xrw, crw // y = y >>> 17; x = x + y movs imm, #17 rors ylw, imm adds xlw, ylw rors yrw, imm adds xrw, yrw // y = y >>> 01; y = y ^ x movs imm, #1 rors ylw, imm eors ylw, xlw rors yrw, imm eors yrw, xrw // x = x ^ rcon eors xlw, clw eors xrw, crw // y = y >>> 17; x = x + y movs imm, #23 rors ylw, imm adds xlw, ylw rors yrw, imm adds xrw, yrw // y = y >>> 01; y = y ^ x movs imm, #24 rors ylw, imm eors ylw, xlw rors yrw, imm eors yrw, xrw // x = x ^ rcon eors xlw, clw eors xrw, crw // y = y >>> 16 movs imm, #16 rors ylw, imm rors yrw, imm .endm .macro LD_BRANS_0_3 // branch 0 (i.e. x0, y0) already in registers xlw, ylw // load branch 3 (i.e. x3, y3) to registers xrw, yrw ldm sptr!, {xrw-yrw} // load round-constants clw = RCON[0] and crw = RCON[3] ldr clw, [cptr, #0] ldr crw, [cptr, #12] .endm .macro LD_BRANS_1_4 // branch 1 (i.e. x1, y1) already in registers xlw, ylw // load branch 4 (i.e. x4, y4) to registers xrw, yrw ldm sptr!, {xrw-yrw} // load round-constants clw = RCON[1] and crw = RCON[4] ldr cptr, =RCON ldr clw, [cptr, #4] ldr crw, [cptr, #16] .endm .macro LD_BRANS_2_5 // branch 2 (i.e. x2, y2) already in registers xlw, ylw // load branch 5 (i.e. x5, y5) to registers xrw, yrw ldm sptr!, {xrw-yrw} // load round-constants clw = RCON[2] and crw = RCON[5] ldr cptr, =RCON ldr clw, [cptr, #8] ldr crw, [cptr, #20] .endm .macro ST_BRANS_0_3 // tmpx = x0, tmpy = y0 mov tw0, xlw mov tw1, ylw // left branch is XORed to right branch eors xrw, xlw eors yrw, ylw // store left branch in the state-array subs sptr, #8 stm sptr!, {xlw-ylw} // load left branch of next pair of ARX-boxes mov xlw, tw2 mov ylw, tw3 // move right branch to temp regs tw2 and tw3 mov tw2, xrw mov tw3, yrw .endm .macro ST_BRANS_1_4 // compute tmpx = tmpx ^ x1, tmpy = tmpy ^ y1 mov tmpx, tw0 mov tmpy, tw1 eors tmpx, xlw eors tmpy, ylw mov tw0, tmpx mov tw1, tmpy // left branch is XORed to right branch eors xrw, xlw eors yrw, ylw // store left branch in the state-array subs sptr, #8 stm sptr!, {xlw-ylw} // load left branch of next pair of ARX-boxes mov xlw, tw4 mov ylw, tw5 // move right branch to temp regs tw4 and tw5 mov tw4, xrw mov tw5, yrw .endm .macro ST_BRANS_2_5 // compute tmpx = tmpx ^ x2, tmpy = tmpy ^ y2 mov tmpx, tw0 mov tmpy, tw1 eors tmpx, xlw eors tmpy, ylw // left branch is XORed to right branch eors xrw, xlw eors yrw, ylw // store left branch in the state-array subs sptr, #8 stm sptr!, {xlw-ylw} // state-pointer contains address of x3 subs sptr, #24 .endm .macro ARXBOX_LAYER_384 // compute branch 0 (x0, y0) and branch 3 (x3, y3) LD_BRANS_0_3 ARX_BOX_PAIR ST_BRANS_0_3 // compute branch 1 (x1, y1) and branch 4 (x4, y4) LD_BRANS_1_4 ARX_BOX_PAIR ST_BRANS_1_4 // compute branch 2 (x2, y2) and branch 5 (x5, y5) LD_BRANS_2_5 ARX_BOX_PAIR ST_BRANS_2_5 // branch 3 (i.e. x3, y3) is in temp regs tw2, tw3 // branch 4 (i.e. x4, y4) is in temp regs tw4, tw5 // branch 5 (i.e. x5, y5) is in regs xrw, yrw .endm .macro LINEAR_LAYER_384 // compute tmpx = ELL(tmpx), tmpy = ELL(tmpy) mov xlw, tmpx mov ylw, tmpy lsls xlw, #16 lsls ylw, #16 eors tmpx, xlw eors tmpy, ylw movs imm, #16 rors tmpx, imm rors tmpy, imm // compute x4 = x4 ^ tmpy and y4 = y4 ^ tmpx // branch 4 becomes branch 0 in next iteration mov xlw, tw4 eors xlw, tmpy mov ylw, tw5 eors ylw, tmpx // compute x3 = x3 ^ tmpy and y3 = y3 ^ tmpx // branch 3 becomes branch 2 in next iteration mov imm, tw2 eors imm, tmpy mov tw4, imm mov imm, tw3 eors imm, tmpx mov tw5, imm // compute x5 = x5 ^ tmpy and y5 = y5 ^ tmpx // branch 5 becomes branch 1 in next iteration eors xrw, tmpy mov tw2, xrw eors yrw, tmpx mov tw3, yrw .endm /////////////////////////////////////////////////////////////////////////////// ////////////////// SPARKLE384 PERMUTATION (BRANCH-UNROLLED) /////////////////// /////////////////////////////////////////////////////////////////////////////// // Function prototype: // ------------------- // void sparkle384_arm(uint32_t *state, int steps) // // Parameters: // ----------- // state: pointer to an uint32_t-array containing the 12 state words // steps: number of steps // // Return value: // ------------- // None .type sparkle384_arm, %function .func sparkle384_arm sparkle384_arm: PROLOGUE_384 // push callee-saved registers and load state movs cnt, #0 // initialize step-counter .Lloop_384: // start of loop push {cnt,step} // push step-counter and 'steps' to free registers ADD_STEP_CNT_384 // macro to add step-counter to state ARXBOX_LAYER_384 // macro for the ARXBOX layer LINEAR_LAYER_384 // macro for the linear layer pop {cnt,step} // restore step-counter and 'steps' from stack adds cnt, #1 // increment step-counter cmp cnt, step // test whether step-counter equals 'steps' beq .Llend_384 // if yes then branch to end of loop b .Lloop_384 // if not then branch to start of loop .Llend_384: // end of loop EPILOGUE_384 // store state and pop callee-saved registers .endfunc .size sparkle384_arm, .-sparkle384_arm /////////////////////////////////////////////////////////////////////////////// /////////////////////////// SPARKLE ROUND CONSTANTS /////////////////////////// /////////////////////////////////////////////////////////////////////////////// // This implementation places the round constants in the .data segment, which // means they are loaded from RAM during the computation of the ARX-boxes. It // would also be possible to place them in the .rodata segment (by replacing // the ".section .data" directive below by ".section .rodata") so that they are // loaded from flash, which reduces the RAM consumption by 32 bytes, but may // increase the execution time on devices with a high number of flash wait // states. .section .data .balign 4 .type RCON, %object .size RCON, 32 RCON: .word 0xB7E15162, 0xBF715880, 0x38B4DA56, 0x324E7738 .word 0xBB1185EB, 0x4F7C7B57, 0xCFBFA1C8, 0xC2B3293D .end