;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; sparkle512_arm.asm: ARM Asm implementation of the SPARKLE512 permutation. ;;
;; This file is part of the SPARKLE submission to NIST's LW Crypto Project. ;;
;; Version 1.0.1 (2019-06-29), see for updates. ;;
;; Authors: The SPARKLE Group (C. Beierle, A. Biryukov, L. Cardoso dos ;;
;; Santos, J. Groszschaedl, L. Perrin, A. Udovenko, V. Velichkov, Q. Wang). ;;
;; License: GPLv3 (see LICENSE file), other licenses available upon request. ;;
;; Copyright (C) 2019 University of Luxembourg . ;;
;; ------------------------------------------------------------------------- ;;
;; This program is free software: you can redistribute it and/or modify it ;;
;; under the terms of the GNU General Public License as published by the ;;
;; Free Software Foundation, either version 3 of the License, or (at your ;;
;; option) any later version. This program is distributed in the hope that ;;
;; it will be useful, but WITHOUT ANY WARRANTY; without even the implied ;;
;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;
;; GNU General Public License for more details. You should have received a ;;
;; copy of the GNU General Public License along with this program. If not, ;;
;; see . ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
AREA sparkle_arm, CODE, READONLY
ALIGN 2
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;; REGISTER NAMES AND CONSTANTS ;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; register sta holds the start address of array
sta RN r0
;; register ns holds the parameter , i.e. the number of steps
ns RN r1
;; register scnt holds the step counter (for loop termination)
scnt RN r2
;; register rca holds the start address of array
rca RN lr
;; register c0w holds the 1st word of the array
c0w RN r3
;; register c1w holds the 2nd word of the array
c1w RN r4
;; register c2w holds the 3rd word of the array
c2w RN r3
;; register c2w holds the 4th word of the array
c3w RN r4
;; register x0w holds the 1st word of the array
x0w RN r5
;; register y0w holds the 2nd word of the array
y0w RN r6
;; register x1w holds the 3rd word of the array
x1w RN r7
;; register y1w holds the 4th word of the array
y1w RN r8
;; register x2w holds the 5th word of the array
x2w RN r9
;; register y2w holds the 6th word of the array
y2w RN r10
;; register x3w holds the 7th word of the array
x3w RN r11
;; register y3w holds the 8th word of the array
y3w RN r12
;; register c4w holds the 5th word of the array
c4w RN r11
;; register c5w holds the 6th word of the array
c5w RN r12
;; register c6w holds the 7th word of the array
c6w RN r11
;; register c7w holds the 8th word of the array
c7w RN r12
;; register x4w holds the 9th word of the array
x4w RN r3
;; register y4w holds the 10th word of the array
y4w RN r4
;; register x5w holds the 11th word of the array
x5w RN r5
;; register y5w holds the 12th word of the array
y5w RN r6
;; register x6w holds the 13th word of the array
x6w RN r7
;; register y6w holds the 14th word of the array
y6w RN r8
;; register x7w holds the 15th word of the array
x7w RN r9
;; register y7w holds the 16th word of the array
y7w RN r10
;; register tmpx holds the XOR of the x-words of array
tmpx RN r1
;; register tmpy holds the XOR of the y-words of array
tmpy RN r2
;; register l0w holds a word from the left of array
l0w RN r11
;; register l1w holds a word from the left of array
l1w RN r12
;; round constants
RCON DCD 0xB7E15162, 0xBF715880, 0x38B4DA56, 0x324E7738, \
0xBB1185EB, 0x4F7C7B57, 0xCFBFA1C8, 0xC2B3293D
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;; MACROS FOR SPARKLE512 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
MACRO
PROLOGUE_512
push {r4-r12,lr}
ldr rca, =RCON
ldmia.w sta!, {x0w-y3w}
MEND
MACRO
EPILOGUE_512
stmdb.w sta!, {x0w-y3w}
pop {r4-r12,pc}
MEND
MACRO
ADD_STEP_CNT_512
and c0w, scnt, #7
ldr.w c0w, [rca, c0w, lsl #2]
eor y0w, y0w, c0w
eor y1w, y1w, scnt
MEND
MACRO
ARX_BOX $xi, $yi, $ci
add $xi, $xi, $yi, ror #31
eor $yi, $yi, $xi, ror #24
eor $xi, $xi, $ci
add $xi, $xi, $yi, ror #17
eor $yi, $yi, $xi, ror #17
eor $xi, $xi, $ci
add $xi, $xi, $yi
eor $yi, $yi, $xi, ror #31
eor $xi, $xi, $ci
add $xi, $xi, $yi, ror #24
eor $yi, $yi, $xi, ror #16
eor $xi, $xi, $ci
MEND
MACRO
QUA_XOR $tx, $x0, $x1, $x2, $x3
eor $tx, $x0, $x1
eor $tx, $tx, $x2
eor $tx, $tx, $x3
MEND
MACRO
ARXBOX_LAYER_512
;; ARX-box computations for the four left-side branches (i.e. x[0]-y[3]).
;; Only two round constants can be loaded at a time (no register space).
ldmia.w rca!, {c0w-c1w}
ARX_BOX x0w, y0w, c0w
ARX_BOX x1w, y1w, c1w
ldmia.w rca!, {c2w-c3w}
ARX_BOX x2w, y2w, c2w
ARX_BOX x3w, y3w, c3w
;; tmpx and tmpy are computed in three steps; the first is a quadruple XOR,
;; i.e. tmpx = x[0] ^ x[1] ^ x[2] ^ x[3], tmpy = y[0] ^ y[1] ^ y[2] ^ y[3].
QUA_XOR tmpx, x0w, x1w, x2w, x3w
QUA_XOR tmpy, y0w, y1w, y2w, y3w
;; Left-side branches (i.e. x[0]-y[3]) are written to memory and right-side
;; branches (i.e. x[4]-y[7]) are loaded from memory, two words at a time.
ldmia.w sta, {x4w-y4w}
stmia.w sta!, {x0w-y0w}
ldmia.w sta, {x5w-y5w}
stmia.w sta!, {x1w-y1w}
ldmia.w sta, {x6w-y6w}
stmia.w sta!, {x2w-y2w}
ldmia.w sta, {x7w-y7w}
stmia.w sta!, {x3w-y3w}
;; ARX-box computations for the four right-side branches (i.e. x[4]-y[7]).
;; Only two round constants can be loaded at a time (no register space).
ldmia.w rca!, {c4w-c5w}
ARX_BOX x4w, y4w, c4w
ARX_BOX x5w, y5w, c5w
ldmia.w rca!, {c6w-c7w}
ARX_BOX x6w, y6w, c6w
ARX_BOX x7w, y7w, c7w
sub rca, rca, #32
MEND
MACRO
LINEAR_LAYER_512
;; Second step (out of three steps) of the computation of tmpx and tmpy:
;; tmpx = tmpx ^ (tmpx << 16) and tmpy = tmpy ^ (tmpy << 16).
eor tmpx, tmpx, tmpx, lsl #16
eor tmpy, tmpy, tmpy, lsl #16
;; First part of Feistel round: left-side branches are loaded from memory
;; (using l0w, l1w) and XORed with the right-side branches.
ldmdb.w sta!, {l0w-l1w}
eor x7w, x7w, l0w
eor y7w, y7w, l1w
ldmdb.w sta!, {l0w-l1w}
eor x6w, x6w, l0w
eor y6w, y6w, l1w
ldmdb.w sta!, {l0w-l1w}
eor x5w, x5w, l0w
eor y5w, y5w, l1w
ldmdb.w sta!, {l0w-l1w}
eor x4w, x4w, l0w
eor y4w, y4w, l1w
;; Branch permutation: 1-branch left-rotation of the right-side branches
;; along with a swap of the left and right branches (via register writes).
;; Also combined with the branch permutation is the second Feistel part,
;; which consists of a 16-bit rotation of tmpx and tmpx (this is the third
;; and final step of the computation of tmpx and tmpx), as well as the XOR
;; of tmpx and tmpy with the result of the first Feistel part.
eor y3w, y4w, tmpx, ror #16
eor x3w, x4w, tmpy, ror #16
eor y2w, y7w, tmpx, ror #16
eor x2w, x7w, tmpy, ror #16
eor y1w, y6w, tmpx, ror #16
eor x1w, x6w, tmpy, ror #16
eor y0w, y5w, tmpx, ror #16
eor x0w, x5w, tmpy, ror #16
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;; SPARKLE512 PERMUTATION (BRANCH-UNROLLED) ;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Function prototype:
;; -------------------
;; void sparkle512_arm(uint32_t *state, int ns)
;;
;; Parameters:
;; -----------
;; state: pointer to an uint32_t-array containing the 16 state words
;; ns: number of steps
;;
;; Return value:
;; -------------
;; None
EXPORT sparkle512_arm
sparkle512_arm FUNCTION
PROLOGUE_512 ;; push callee-saved registers
mov scnt, #0 ;; clear step-counter
loop_512
ADD_STEP_CNT_512 ;; macro to add step-counter to state
push {ns-scnt} ;; push ns and step-counter (we need registers!)
ARXBOX_LAYER_512 ;; macro for the arxbox layer
LINEAR_LAYER_512 ;; macro for the linear layer
pop {ns-scnt} ;; restore ns and step-counter from stack
add scnt, #1 ;; increment step-counter
teq scnt, ns ;; test whether step-counter equals ns
bne loop_512 ;; if not then jump back to start of loop
EPILOGUE_512 ;; pop callee-saved registers
ENDFUNC
END