Commit a3a77713 by Sebastien Riou Committed by Enrico Pozzobon

drygascon add_arm_cortex-m

parent 60a8ce1b
......@@ -4,8 +4,12 @@ Sebastien Riou, May 27th 2020
Implementation optimized for ARM-Cortex-M0 (Size and Speed)
*/
//define __DRYGASCON_ARM_SELECTOR_V6M__ or add drygascon128_arm_selector.h to includes
#if defined(__DRYGASCON_ARM_SELECTOR_H__)
#ifndef __DRYGASCON_ARM_SELECTOR_V6M__
#include "drygascon128_arm_selector.h"
#endif
#if defined(__DRYGASCON_ARM_SELECTOR_V6M__)
.cpu cortex-m0
.syntax unified
.code 16
......
......@@ -22,7 +22,12 @@ the 'v7m_fpu_x' can be used to prevent this attack.
Note that implementation 'v7m_fpu' is faster (but requires FPU).
*/
#if defined(__DRYGASCON_ARM_SELECTOR_H__)
//define __DRYGASCON_ARM_SELECTOR_V7M__ or add drygascon128_arm_selector.h to includes
#ifndef __DRYGASCON_ARM_SELECTOR_V7M__
#include "drygascon128_arm_selector.h"
#endif
#if defined(__DRYGASCON_ARM_SELECTOR_V7M__)
.cpu cortex-m3
.syntax unified
.code 16
......@@ -113,145 +118,104 @@ drygascon128_g_v7m_main_loop:
//r0 to r9: c
//r11: constant to add as round constant
//r14: pointer on C
push {r14}
// addition of round constant
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
eor r0, r0, r8
eor r1, r1, r9
eor r8, r8, r6
eor r9, r9, r7
eor r4, r4, r2
eor r5, r5, r3
bic r10, r0, r8
bic r11, r8, r6
bic r12, r4, r2
bic r14, r2, r0
eor r4, r4, r11
eor r0, r0, r12
eor r8, r8, r14
bic r14, r6, r4
eor r6, r6, r10
bic r12, r1, r9
bic r10, r5, r3
bic r11, r9, r7
eor r2, r2, r14
eor r1, r1, r10
eor r5, r5, r11
bic r14, r3, r1
bic r10, r7, r5
eor r7, r7, r12
eor r7, r7, r5
eor r9, r9, r14
eor r3, r3, r10
eor r6, r6, r4
eor r2, r2, r0
eor r3, r3, r1
eor r0, r0, r8
eor r1, r1, r9
mvn r4, r4
mvn r5, r5
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
ldr r12,[r14,#R32_1-C0]
eors r12,r12,r1
str r12,[r14,#R32_1-C0]
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
ldr r12,[r14,#R32_0-C0]
eors r12,r12,r0
str r12,[r14,#R32_0-C0]
//c0 step 1
eor r11, r1, r0, ror #10
eor r10, r0, r1, ror #9
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
ldr r12,[r14,#R32_3-C0]
eors r12,r12,r3
str r12,[r14,#R32_3-C0]
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
ldr r12,[r14,#R32_2-C0]
eors r12,r12,r2
str r12,[r14,#R32_2-C0]
//c1 step 1
eor r14, r3, r2, ror #31
eor r12, r2, r3, ror #30
//c0 step 2
eor r1, r11, r1, ror #14
eor r0, r10, r0, ror #14
//c1 step 2
eor r3, r14, r3, ror #19
eor r2, r12, r2, ror #19
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
ldr r12,[r14,#R32_0-C0]
eors r12,r12,r5
str r12,[r14,#R32_0-C0]
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
ldr r12,[r14,#R32_3-C0]
eors r12,r12,r4
str r12,[r14,#R32_3-C0]
//c2 step 1
eor r11, r5, r4, ror #1
eor r10, r4, r5, ror #0
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
ldr r12,[r14,#R32_2-C0]
eors r12,r12,r7
str r12,[r14,#R32_2-C0]
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
ldr r12,[r14,#R32_1-C0]
eors r12,r12,r6
str r12,[r14,#R32_1-C0]
//c3 step 1
eor r14, r7, r6, ror #9
eor r12, r6, r7, ror #8
//c2 step 2
eor r5, r11, r5, ror #3
eor r4, r10, r4, ror #3
//c3 step 2
eor r7, r14, r7, ror #5
eor r6, r12, r6, ror #5
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 step 1
eor r11, r9, r8, ror #4
eor r10, r8, r9, ror #3
//c4 step 2
eor r9, r11, r9, ror #20
eor r8, r10, r8, ror #20
pop {r14}
// accumulate
adds r12,r14,#R0
LDMIA.W r12, {r10,r11}
eor r10,r10,r0
eor r11,r11,r1
eor r10,r10,r5
eor r11,r11,r6
STMIA.W r12, {r10,r11}
adds r12,r14,#R1
LDMIA.W r12, {r10,r11}
eor r10,r10,r2
eor r11,r11,r3
eor r10,r10,r7
eor r11,r11,r4
STMIA.W r12, {r10,r11}
//state:
//r0 to r9: c
......@@ -390,120 +354,83 @@ drygascon128_f_v7m_mix128_coreround:
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
eor r0, r0, r8
eor r1, r1, r9
eor r8, r8, r6
eor r9, r9, r7
eor r4, r4, r2
eor r5, r5, r3
bic r10, r0, r8
bic r11, r8, r6
bic r12, r4, r2
bic r14, r2, r0
eor r4, r4, r11
eor r0, r0, r12
eor r8, r8, r14
bic r14, r6, r4
eor r6, r6, r10
bic r12, r1, r9
bic r10, r5, r3
bic r11, r9, r7
eor r2, r2, r14
eor r1, r1, r10
eor r5, r5, r11
bic r14, r3, r1
bic r10, r7, r5
eor r7, r7, r12
eor r7, r7, r5
eor r9, r9, r14
eor r3, r3, r10
eor r6, r6, r4
eor r2, r2, r0
eor r3, r3, r1
eor r0, r0, r8
eor r1, r1, r9
mvn r4, r4
mvn r5, r5
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c0 step 1
eor r11, r1, r0, ror #10
eor r10, r0, r1, ror #9
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c1 step 1
eor r14, r3, r2, ror #31
eor r12, r2, r3, ror #30
//c0 step 2
eor r1, r11, r1, ror #14
eor r0, r10, r0, ror #14
//c1 step 2
eor r3, r14, r3, ror #19
eor r2, r12, r2, ror #19
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c2 step 1
eor r11, r5, r4, ror #1
eor r10, r4, r5, ror #0
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//c3 step 1
eor r14, r7, r6, ror #9
eor r12, r6, r7, ror #8
//c2 step 2
eor r5, r11, r5, ror #3
eor r4, r10, r4, ror #3
//c3 step 2
eor r7, r14, r7, ror #5
eor r6, r12, r6, ror #5
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 step 1
eor r11, r9, r8, ror #4
eor r10, r8, r9, ror #3
//c4 step 2
eor r9, r11, r9, ror #20
eor r8, r10, r8, ror #20
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
//r10,r11,r12,r14 destroyed
ldr r10,[sp,#16]
cmp r10,#130
......@@ -561,6 +488,7 @@ drygascon128_g0_v7m:
//r11 = ((0xf - 0) << 4) | 0;
movs r11,#0xf0
push {r14}
//state:
//r0 to r9: c
//r11: constant to add as round constant
......@@ -569,120 +497,84 @@ drygascon128_g0_v7m:
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
eor r0, r0, r8
eor r1, r1, r9
eor r8, r8, r6
eor r9, r9, r7
eor r4, r4, r2
eor r5, r5, r3
bic r10, r0, r8
bic r11, r8, r6
bic r12, r4, r2
bic r14, r2, r0
eor r4, r4, r11
eor r0, r0, r12
eor r8, r8, r14
bic r14, r6, r4
eor r6, r6, r10
bic r12, r1, r9
bic r10, r5, r3
bic r11, r9, r7
eor r2, r2, r14
eor r1, r1, r10
eor r5, r5, r11
bic r14, r3, r1
bic r10, r7, r5
eor r7, r7, r12
eor r7, r7, r5
eor r9, r9, r14
eor r3, r3, r10
eor r6, r6, r4
eor r2, r2, r0
eor r3, r3, r1
eor r0, r0, r8
eor r1, r1, r9
mvn r4, r4
mvn r5, r5
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c0 step 1
eor r11, r1, r0, ror #10
eor r10, r0, r1, ror #9
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c1 step 1
eor r14, r3, r2, ror #31
eor r12, r2, r3, ror #30
//c0 step 2
eor r1, r11, r1, ror #14
eor r0, r10, r0, ror #14
//c1 step 2
eor r3, r14, r3, ror #19
eor r2, r12, r2, ror #19
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c2 step 1
eor r11, r5, r4, ror #1
eor r10, r4, r5, ror #0
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//c3 step 1
eor r14, r7, r6, ror #9
eor r12, r6, r7, ror #8
//c2 step 2
eor r5, r11, r5, ror #3
eor r4, r10, r4, ror #3
//c3 step 2
eor r7, r14, r7, ror #5
eor r6, r12, r6, ror #5
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 step 1
eor r11, r9, r8, ror #4
eor r10, r8, r9, ror #3
//c4 step 2
eor r9, r11, r9, ror #20
eor r8, r10, r8, ror #20
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
//r10,r11,r12,r14 destroyed
pop {r14}
//update C
STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
......
......@@ -20,8 +20,13 @@ Reference manual) shows data cache lines of 16 bytes.
- In the unlikely case in which none of the condition can be met,
the 'v7m_fpu_x' can be used to prevent this attack.
*/
#if defined(__DRYGASCON_ARM_SELECTOR_H__)
.cpu cortex-m3
//define __DRYGASCON_ARM_SELECTOR_V7M_FPU__ or add drygascon128_arm_selector.h to includes
#ifndef __DRYGASCON_ARM_SELECTOR_V7M_FPU__
#include "drygascon128_arm_selector.h"
#endif
#if defined(__DRYGASCON_ARM_SELECTOR_V7M_FPU__)
.cpu cortex-m4
.syntax unified
.code 16
.thumb_func
......@@ -79,10 +84,11 @@ drygascon128_g_v7m_fpu:
// 0 state address
//r=0
VSUB.F32 S10, S10, S10
VSUB.F32 S11, S11, S11
VSUB.F32 S12, S12, S12
VSUB.F32 S13, S13, S13
movs r10,#0
vmov S10,r10
vmov S11,r10
vmov S12,r10
vmov S13,r10
//round=r10=rounds-1;
subs r11,r1,#1
......@@ -116,141 +122,100 @@ drygascon128_g_v7m_fpu_main_loop:
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
eor r0, r0, r8
eor r1, r1, r9
eor r8, r8, r6
eor r9, r9, r7
eor r4, r4, r2
eor r5, r5, r3
bic r10, r0, r8
bic r11, r8, r6
bic r12, r4, r2
bic r14, r2, r0
eor r4, r4, r11
eor r0, r0, r12
eor r8, r8, r14
bic r14, r6, r4
eor r6, r6, r10
bic r12, r1, r9
bic r10, r5, r3
bic r11, r9, r7
eor r2, r2, r14
eor r1, r1, r10
eor r5, r5, r11
bic r14, r3, r1
bic r10, r7, r5
eor r7, r7, r12
eor r7, r7, r5
eor r9, r9, r14
eor r3, r3, r10
eor r6, r6, r4
eor r2, r2, r0
eor r3, r3, r1
eor r0, r0, r8
eor r1, r1, r9
mvn r4, r4
mvn r5, r5
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
vmov r14,S11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//r14 is R32_1
eors r14,r14,r1
vmov r12,S10
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//r12 is R32_0
eors r12,r12,r0
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//r12 is R32_0
eors r12,r12,r5
vmov S10,r12
vmov r12,S13
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//r12 is R32_3
eors r12,r12,r4
//c0 step 1
eor r11, r1, r0, ror #10
eor r10, r0, r1, ror #9
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//r12 is R32_3
eors r12,r12,r3
vmov S13,r12
vmov r12,S12
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//r12 is R32_2
eors r12,r12,r2
//c1 step 1
eor r14, r3, r2, ror #31
eor r12, r2, r3, ror #30
//c0 step 2
eor r1, r11, r1, ror #14
eor r0, r10, r0, ror #14
//c1 step 2
eor r3, r14, r3, ror #19
eor r2, r12, r2, ror #19
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 step 1
eor r11, r5, r4, ror #1
eor r10, r4, r5, ror #0
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//r12 is R32_2
eors r12,r12,r7
//c3 step 1
eor r14, r7, r6, ror #9
eor r12, r6, r7, ror #8
//c2 step 2
eor r5, r11, r5, ror #3
eor r4, r10, r4, ror #3
//c3 step 2
eor r7, r14, r7, ror #5
eor r6, r12, r6, ror #5
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 step 1
eor r11, r9, r8, ror #4
eor r10, r8, r9, ror #3
//c4 step 2
eor r9, r11, r9, ror #20
eor r8, r10, r8, ror #20
// accumulate
vmov r10,S10
vmov r11,S11
vmov r12,S12
vmov r14,S13
eor r10,r10,r0
eor r11,r11,r1
eor r12,r12,r2
eor r14,r14,r3
eor r10,r10,r5
eor r11,r11,r6
eor r12,r12,r7
eor r14,r14,r4
vmov S10,r10
vmov S11,r11
vmov S12,r12
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//r14 is R32_1
eors r14,r14,r6
vmov S11,r14
vmov S13,r14
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
//r10,r11,r12,r14 destroyed
ldr r10,[sp,#4]
......@@ -294,10 +259,10 @@ drygascon128_f_v7m_fpu:
push {r0,r3,r10} //make the same stack frame as drygascon128_g_cm7
push {r1,r2}
//r=0
VSUB.F32 S10, S10, S10
VSUB.F32 S11, S11, S11
VSUB.F32 S12, S12, S12
VSUB.F32 S13, S13, S13
vmov S10,r10
vmov S11,r10
vmov S12,r10
vmov S13,r10
//Load C
adds r11,r0,#C0
......@@ -391,120 +356,83 @@ drygascon128_f_v7m_fpu_mix128_coreround:
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
eor r0, r0, r8
eor r1, r1, r9
eor r8, r8, r6
eor r9, r9, r7
eor r4, r4, r2
eor r5, r5, r3
bic r10, r0, r8
bic r11, r8, r6
bic r12, r4, r2
bic r14, r2, r0
eor r4, r4, r11
eor r0, r0, r12
eor r8, r8, r14
bic r14, r6, r4
eor r6, r6, r10
bic r12, r1, r9
bic r10, r5, r3
bic r11, r9, r7
eor r2, r2, r14
eor r1, r1, r10
eor r5, r5, r11
bic r14, r3, r1
bic r10, r7, r5
eor r7, r7, r12
eor r7, r7, r5
eor r9, r9, r14
eor r3, r3, r10
eor r6, r6, r4
eor r2, r2, r0
eor r3, r3, r1
eor r0, r0, r8
eor r1, r1, r9
mvn r4, r4
mvn r5, r5
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c0 step 1
eor r11, r1, r0, ror #10
eor r10, r0, r1, ror #9
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c1 step 1
eor r14, r3, r2, ror #31
eor r12, r2, r3, ror #30
//c0 step 2
eor r1, r11, r1, ror #14
eor r0, r10, r0, ror #14
//c1 step 2
eor r3, r14, r3, ror #19
eor r2, r12, r2, ror #19
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c2 step 1
eor r11, r5, r4, ror #1
eor r10, r4, r5, ror #0
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//c3 step 1
eor r14, r7, r6, ror #9
eor r12, r6, r7, ror #8
//c2 step 2
eor r5, r11, r5, ror #3
eor r4, r10, r4, ror #3
//c3 step 2
eor r7, r14, r7, ror #5
eor r6, r12, r6, ror #5
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 step 1
eor r11, r9, r8, ror #4
eor r10, r8, r9, ror #3
//c4 step 2
eor r9, r11, r9, ror #20
eor r8, r10, r8, ror #20
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
//r10,r11,r12,r14 destroyed
ldr r10,[sp,#16]
cmp r10,#130
......@@ -561,6 +489,7 @@ drygascon128_g0_v7m_fpu:
//r11 = ((0xf - 0) << 4) | 0;
movs r11,#0xf0
push {r14}
//state:
//r0 to r9: c
//r11: constant to add as round constant
......@@ -569,120 +498,84 @@ drygascon128_g0_v7m_fpu:
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
eor r0, r0, r8
eor r1, r1, r9
eor r8, r8, r6
eor r9, r9, r7
eor r4, r4, r2
eor r5, r5, r3
bic r10, r0, r8
bic r11, r8, r6
bic r12, r4, r2
bic r14, r2, r0
eor r4, r4, r11
eor r0, r0, r12
eor r8, r8, r14
bic r14, r6, r4
eor r6, r6, r10
bic r12, r1, r9
bic r10, r5, r3
bic r11, r9, r7
eor r2, r2, r14
eor r1, r1, r10
eor r5, r5, r11
bic r14, r3, r1
bic r10, r7, r5
eor r7, r7, r12
eor r7, r7, r5
eor r9, r9, r14
eor r3, r3, r10
eor r6, r6, r4
eor r2, r2, r0
eor r3, r3, r1
eor r0, r0, r8
eor r1, r1, r9
mvn r4, r4
mvn r5, r5
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c0 step 1
eor r11, r1, r0, ror #10
eor r10, r0, r1, ror #9
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c1 step 1
eor r14, r3, r2, ror #31
eor r12, r2, r3, ror #30
//c0 step 2
eor r1, r11, r1, ror #14
eor r0, r10, r0, ror #14
//c1 step 2
eor r3, r14, r3, ror #19
eor r2, r12, r2, ror #19
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c2 step 1
eor r11, r5, r4, ror #1
eor r10, r4, r5, ror #0
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//c3 step 1
eor r14, r7, r6, ror #9
eor r12, r6, r7, ror #8
//c2 step 2
eor r5, r11, r5, ror #3
eor r4, r10, r4, ror #3
//c3 step 2
eor r7, r14, r7, ror #5
eor r6, r12, r6, ror #5
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 step 1
eor r11, r9, r8, ror #4
eor r10, r8, r9, ror #3
//c4 step 2
eor r9, r11, r9, ror #20
eor r8, r10, r8, ror #20
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
//r10,r11,r12,r14 destroyed
pop {r14}
//update C
STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
......
......@@ -7,7 +7,12 @@ Include protection against timing attack on X look up operations
Note that implementation 'v7m_fpu' is faster and safe on all Cortex-M7 as of May 2020.
*/
#if defined(__DRYGASCON_ARM_SELECTOR_H__)
//define __DRYGASCON_ARM_SELECTOR_V7M_FPU_X__ or add drygascon128_arm_selector.h to includes
#ifndef __DRYGASCON_ARM_SELECTOR_V7M_FPU_X__
#include "drygascon128_arm_selector.h"
#endif
#if defined(__DRYGASCON_ARM_SELECTOR_V7M_FPU_X__)
.cpu cortex-m7
.syntax unified
.code 16
......@@ -66,10 +71,11 @@ drygascon128_g_v7m_fpu_x:
// 0 state address
//r=0
VSUB.F32 S10, S10, S10
VSUB.F32 S11, S11, S11
VSUB.F32 S12, S12, S12
VSUB.F32 S13, S13, S13
movs r10,#0
vmov S10,r10
vmov S11,r10
vmov S12,r10
vmov S13,r10
//round=r10=rounds-1;
subs r11,r1,#1
......@@ -103,141 +109,100 @@ drygascon128_g_v7m_fpu_x_main_loop:
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
eor r0, r0, r8
eor r1, r1, r9
eor r8, r8, r6
eor r9, r9, r7
eor r4, r4, r2
eor r5, r5, r3
bic r10, r0, r8
bic r11, r8, r6
bic r12, r4, r2
bic r14, r2, r0
eor r4, r4, r11
eor r0, r0, r12
eor r8, r8, r14
bic r14, r6, r4
eor r6, r6, r10
bic r12, r1, r9
bic r10, r5, r3
bic r11, r9, r7
eor r2, r2, r14
eor r1, r1, r10
eor r5, r5, r11
bic r14, r3, r1
bic r10, r7, r5
eor r7, r7, r12
eor r7, r7, r5
eor r9, r9, r14
eor r3, r3, r10
eor r6, r6, r4
eor r2, r2, r0
eor r3, r3, r1
eor r0, r0, r8
eor r1, r1, r9
mvn r4, r4
mvn r5, r5
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
vmov r14,S11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//r14 is R32_1
eors r14,r14,r1
vmov r12,S10
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//r12 is R32_0
eors r12,r12,r0
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//r12 is R32_0
eors r12,r12,r5
vmov S10,r12
vmov r12,S13
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//r12 is R32_3
eors r12,r12,r4
//c0 step 1
eor r11, r1, r0, ror #10
eor r10, r0, r1, ror #9
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//r12 is R32_3
eors r12,r12,r3
vmov S13,r12
vmov r12,S12
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//r12 is R32_2
eors r12,r12,r2
//c1 step 1
eor r14, r3, r2, ror #31
eor r12, r2, r3, ror #30
//c0 step 2
eor r1, r11, r1, ror #14
eor r0, r10, r0, ror #14
//c1 step 2
eor r3, r14, r3, ror #19
eor r2, r12, r2, ror #19
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 step 1
eor r11, r5, r4, ror #1
eor r10, r4, r5, ror #0
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//r12 is R32_2
eors r12,r12,r7
//c3 step 1
eor r14, r7, r6, ror #9
eor r12, r6, r7, ror #8
//c2 step 2
eor r5, r11, r5, ror #3
eor r4, r10, r4, ror #3
//c3 step 2
eor r7, r14, r7, ror #5
eor r6, r12, r6, ror #5
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 step 1
eor r11, r9, r8, ror #4
eor r10, r8, r9, ror #3
//c4 step 2
eor r9, r11, r9, ror #20
eor r8, r10, r8, ror #20
// accumulate
vmov r10,S10
vmov r11,S11
vmov r12,S12
vmov r14,S13
eor r10,r10,r0
eor r11,r11,r1
eor r12,r12,r2
eor r14,r14,r3
eor r10,r10,r5
eor r11,r11,r6
eor r12,r12,r7
eor r14,r14,r4
vmov S10,r10
vmov S11,r11
vmov S12,r12
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//r14 is R32_1
eors r14,r14,r6
vmov S11,r14
vmov S13,r14
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
//r10,r11,r12,r14 destroyed
ldr r10,[sp,#4]
......@@ -281,10 +246,10 @@ drygascon128_f_v7m_fpu_x:
push {r0,r3,r10} //make the same stack frame as drygascon128_g_cm7
push {r1,r2}
//r=0
VSUB.F32 S10, S10, S10
VSUB.F32 S11, S11, S11
VSUB.F32 S12, S12, S12
VSUB.F32 S13, S13, S13
vmov S10,r10
vmov S11,r10
vmov S12,r10
vmov S13,r10
//Load C
adds r11,r0,#C0
......@@ -389,120 +354,83 @@ drygascon128_f_v7m_fpu_x_mix128_coreround:
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
eor r0, r0, r8
eor r1, r1, r9
eor r8, r8, r6
eor r9, r9, r7
eor r4, r4, r2
eor r5, r5, r3
bic r10, r0, r8
bic r11, r8, r6
bic r12, r4, r2
bic r14, r2, r0
eor r4, r4, r11
eor r0, r0, r12
eor r8, r8, r14
bic r14, r6, r4
eor r6, r6, r10
bic r12, r1, r9
bic r10, r5, r3
bic r11, r9, r7
eor r2, r2, r14
eor r1, r1, r10
eor r5, r5, r11
bic r14, r3, r1
bic r10, r7, r5
eor r7, r7, r12
eor r7, r7, r5
eor r9, r9, r14
eor r3, r3, r10
eor r6, r6, r4
eor r2, r2, r0
eor r3, r3, r1
eor r0, r0, r8
eor r1, r1, r9
mvn r4, r4
mvn r5, r5
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c0 step 1
eor r11, r1, r0, ror #10
eor r10, r0, r1, ror #9
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c1 step 1
eor r14, r3, r2, ror #31
eor r12, r2, r3, ror #30
//c0 step 2
eor r1, r11, r1, ror #14
eor r0, r10, r0, ror #14
//c1 step 2
eor r3, r14, r3, ror #19
eor r2, r12, r2, ror #19
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c2 step 1
eor r11, r5, r4, ror #1
eor r10, r4, r5, ror #0
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//c3 step 1
eor r14, r7, r6, ror #9
eor r12, r6, r7, ror #8
//c2 step 2
eor r5, r11, r5, ror #3
eor r4, r10, r4, ror #3
//c3 step 2
eor r7, r14, r7, ror #5
eor r6, r12, r6, ror #5
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 step 1
eor r11, r9, r8, ror #4
eor r10, r8, r9, ror #3
//c4 step 2
eor r9, r11, r9, ror #20
eor r8, r10, r8, ror #20
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
//r10,r11,r12,r14 destroyed
ldr r10,[sp,#16]
cmp r10,#130
......@@ -559,6 +487,7 @@ drygascon128_g0_v7m_fpu_x:
//r11 = ((0xf - 0) << 4) | 0;
movs r11,#0xf0
push {r14}
//state:
//r0 to r9: c
//r11: constant to add as round constant
......@@ -567,120 +496,84 @@ drygascon128_g0_v7m_fpu_x:
//C2L ^= round constant;
eors r4,r4,r11
// substitution layer, lower half
eors r0,r0,r8
eors r8,r8,r6
eors r4,r4,r2
mvns r10,r0
mvns r11,r6
mvns r12,r8
ands r10,r10,r2
ands r11,r11,r8
eors r8,r8,r10
ands r12,r12,r0
mvns r10,r4
ands r10,r10,r6
eors r6,r6,r12
mvns r12,r2
ands r12,r12,r4
eors r4,r4,r11
eors r6,r6,r4
mvns r4,r4
eors r0,r0,r12
eors r2,r2,r10
eors r2,r2,r0
eors r0,r0,r8
// substitution layer, upper half
eors r1,r1,r9
eors r9,r9,r7
eors r5,r5,r3
mvns r10,r1
mvns r11,r7
mvns r12,r9
ands r10,r10,r3
ands r11,r11,r9
eors r9,r9,r10
ands r12,r12,r1
mvns r10,r5
ands r10,r10,r7
eors r7,r7,r12
mvns r12,r3
ands r12,r12,r5
eors r5,r5,r11
eors r7,r7,r5
mvns r5,r5
eors r1,r1,r12
eors r3,r3,r10
eors r3,r3,r1
eors r1,r1,r9
eor r0, r0, r8
eor r1, r1, r9
eor r8, r8, r6
eor r9, r9, r7
eor r4, r4, r2
eor r5, r5, r3
bic r10, r0, r8
bic r11, r8, r6
bic r12, r4, r2
bic r14, r2, r0
eor r4, r4, r11
eor r0, r0, r12
eor r8, r8, r14
bic r14, r6, r4
eor r6, r6, r10
bic r12, r1, r9
bic r10, r5, r3
bic r11, r9, r7
eor r2, r2, r14
eor r1, r1, r10
eor r5, r5, r11
bic r14, r3, r1
bic r10, r7, r5
eor r7, r7, r12
eor r7, r7, r5
eor r9, r9, r14
eor r3, r3, r10
eor r6, r6, r4
eor r2, r2, r0
eor r3, r3, r1
eor r0, r0, r8
eor r1, r1, r9
mvn r4, r4
mvn r5, r5
// linear diffusion layer
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 high part
rors r11,r9,#(20)
eors r9,r11,r9
rors r10,r8,#(4)
eors r9,r10,r9
//c4 low part
rors r11,r11,#((32-20+3)%32)
eors r11,r11,r8
rors r10,r8,#(20)
eors r8,r10,r11
//c0 ^= gascon_rotr64_interleaved(c0, 28) ^ gascon_rotr64_interleaved(c0, 19);
//c0 high part
rors r11,r1,#(14)
eors r1,r11,r1
rors r10,r0,#(10)
eors r1,r10,r1
//c0 low part
rors r11,r11,#((32-14+9)%32)
eors r11,r11,r0
rors r10,r0,#(14)
eors r0,r10,r11
//c0 step 1
eor r11, r1, r0, ror #10
eor r10, r0, r1, ror #9
//c1 ^= gascon_rotr64_interleaved(c1, 38) ^ gascon_rotr64_interleaved(c1, 61);
//c1 high part
rors r11,r3,#(19)
eors r3,r11,r3
rors r10,r2,#(31)
eors r3,r10,r3
//c1 low part
rors r11,r11,#((32-19+30)%32)
eors r11,r11,r2
rors r10,r2,#(19)
eors r2,r10,r11
//c1 step 1
eor r14, r3, r2, ror #31
eor r12, r2, r3, ror #30
//c0 step 2
eor r1, r11, r1, ror #14
eor r0, r10, r0, ror #14
//c1 step 2
eor r3, r14, r3, ror #19
eor r2, r12, r2, ror #19
//c2 ^= gascon_rotr64_interleaved(c2, 6) ^ gascon_rotr64_interleaved(c2, 1);
//c2 high part
rors r11,r5,#(3)
eors r5,r11,r5
rors r10,r4,#(1)
eors r5,r10,r5
//c2 low part
rors r11,r11,#((32-3+0)%32)
eors r11,r11,r4
rors r10,r4,#(3)
eors r4,r10,r11
//c2 step 1
eor r11, r5, r4, ror #1
eor r10, r4, r5, ror #0
//c3 ^= gascon_rotr64_interleaved(c3, 10) ^ gascon_rotr64_interleaved(c3, 17);
//c3 high part
rors r11,r7,#(5)
eors r7,r11,r7
rors r10,r6,#(9)
eors r7,r10,r7
//c3 low part
rors r11,r11,#((32-5+8)%32)
eors r11,r11,r6
rors r10,r6,#(5)
eors r6,r10,r11
//c3 step 1
eor r14, r7, r6, ror #9
eor r12, r6, r7, ror #8
//c2 step 2
eor r5, r11, r5, ror #3
eor r4, r10, r4, ror #3
//c3 step 2
eor r7, r14, r7, ror #5
eor r6, r12, r6, ror #5
//c4 ^= gascon_rotr64_interleaved(c4, 40) ^ gascon_rotr64_interleaved(c4, 7);
//c4 step 1
eor r11, r9, r8, ror #4
eor r10, r8, r9, ror #3
//c4 step 2
eor r9, r11, r9, ror #20
eor r8, r10, r8, ror #20
//state:
//r0 to r9: c
//r10,r11,r12 destroyed
//r10,r11,r12,r14 destroyed
pop {r14}
//update C
STMIA.W r14,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
......
......@@ -3,41 +3,73 @@
//Optional file to select the best implementation for each chip
#ifdef STM32H743xx
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FPU__
#define __DRYGASCON_ARM_SELECTOR_V7M_FPU__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#ifdef STM32F746xx
#define __DRYGASCON_ARM_SELECTOR_V7M_FPU__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#ifdef STM32F411xx
#define __DRYGASCON_ARM_SELECTOR_V7M_FPU__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#ifdef STM32L552xx //technically it is V8M but we don't have a specific code for that one
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FPU__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#ifdef STM32F103xx
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#ifdef STM32L011xx
#define __DRYGASCON_ARM_SELECTOR_V6M__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#ifdef __SAM3X8E__
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
//TODO: add more chips here
#ifdef __DRYGASCON_ARM_SELECTOR_V7M__
#ifdef __DRYGASCON_ARM_SELECTOR_FPU__
#define DRYGASCON_G_OPT drygascon128_g_v7m_fpu
#define DRYGASCON_F_OPT drygascon128_f_v7m_fpu
#define DRYGASCON_G0_OPT drygascon128_g0_v7m_fpu
#else
#define DRYGASCON_G_OPT drygascon128_g_v7m
#define DRYGASCON_F_OPT drygascon128_f_v7m
#define DRYGASCON_G0_OPT drygascon128_g0_v7m
#ifndef __DRYGASCON_ARM_SELECTOR_FOUND__
//more generic defines catching whole families
#if defined(STM32F4xx) || defined(STM32F7xx) || defined(STM32H7xx)
#define __DRYGASCON_ARM_SELECTOR_V7M_FPU__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#if defined(STM32F1xx)
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#endif
#ifdef __DRYGASCON_ARM_SELECTOR_V7M_FPU__
#define DRYGASCON_G_OPT drygascon128_g_v7m_fpu
#define DRYGASCON_F_OPT drygascon128_f_v7m_fpu
#define DRYGASCON_G0_OPT drygascon128_g0_v7m_fpu
#endif
#ifdef __DRYGASCON_ARM_SELECTOR_V7M_FPU_X__
#define DRYGASCON_G_OPT drygascon128_g_v7m_fpu_x
#define DRYGASCON_F_OPT drygascon128_f_v7m_fpu_x
#define DRYGASCON_G0_OPT drygascon128_g0_v7m_fpu_x
#endif
#ifdef __DRYGASCON_ARM_SELECTOR_V7M__
#define DRYGASCON_G_OPT drygascon128_g_v7m
#define DRYGASCON_F_OPT drygascon128_f_v7m
#define DRYGASCON_G0_OPT drygascon128_g0_v7m
#endif
#ifdef __DRYGASCON_ARM_SELECTOR_V6M__
#define DRYGASCON_G_OPT drygascon128_g_v6m
#define DRYGASCON_F_OPT drygascon128_f_v6m
......
......@@ -8,7 +8,7 @@ int crypto_aead_encrypt
const unsigned char *npub,
const unsigned char *k)
{
return drygascon128_aead_encrypt
return drygascon128k16_aead_encrypt
(c, clen, m, mlen, ad, adlen, nsec, npub, k);
}
......@@ -20,6 +20,6 @@ int crypto_aead_decrypt
const unsigned char *npub,
const unsigned char *k)
{
return drygascon128_aead_decrypt
return drygascon128k16_aead_decrypt
(m, mlen, nsec, c, clen, ad, adlen, npub, k);
}
......@@ -245,7 +245,7 @@ typedef union
*/
typedef struct
{
gascon128_state_t c; /**< GASCON-128 state for the capacity */
gascon128_state_t c; /**< GASCON-128 state for the capacity */
uint32_t domain; /**< Domain value to mix on next F call */
uint32_t rounds; /**< Number of rounds for next G call */
drysponge128_rate_t r; /**< Buffer for a rate block of data */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment