diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/encrypt.c index a7fd7a4..2cb824b 100644 --- a/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/encrypt.c +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/encrypt.c @@ -59,87 +59,93 @@ static void permutation256(unsigned char *in, int rounds, unsigned char *rc) { uint32_t one = 0x1; uint32_t ff = 0xff; __asm volatile( - "ldr w0, [in] \n\t" - "ldr w4, [in, #4] \n\t" - "ldr w1, [in, #8] \n\t" - "ldr w5, [in, #12] \n\t" - "ldr w2, [in, #16] \n\t" - "ldr w6, [in, #20] \n\t" - "ldr w3, [in, #24] \n\t" - "ldr w7, [in, #28] \n\t" - "mov s0, 0xfff \n\t" - "mov s2, 0x1fff \n\t" - "lsl s2, s2, #12 \n\t" - "eors s2, s2, s0 \n\t" - "enc_loop: \n\t" + "enc_loop_%=: \n\t" + "ldr %[w0], [%[in]] \n\t" + "ldr %[w4], [%[in], #4] \n\t" + "ldr %[w1], [%[in], #8] \n\t" + "ldr %[w5], [%[in], #12] \n\t" + "ldr %[w2], [%[in], #16] \n\t" + "ldr %[w6], [%[in], #20] \n\t" + "ldr %[w3], [%[in], #24] \n\t" + "ldr %[w7], [%[in], #28] \n\t" + "mov %[s0], 0xfff \n\t" + "mov %[s2], 0x1fff \n\t" + "lsl %[s2], %[s2], #12 \n\t" + "eors %[s2], %[s2], %[s0] \n\t" "/*add round const*/ \n\t" - "ldrb s0, [rc] \n\t" - "eors w0, w0, s0 \n\t" + "ldrb %[s0], [%[rc]] \n\t" + "eors %[w0], %[w0], %[s0] \n\t" "/*sbox first column*/ \n\t" - "mvns w0, w0 \n\t" - "ands s0, w1, w0 \n\t" - "eors s0, w2, s0 \n\t" - "orrs w2, w1, w2 \n\t" - "eors w0, w3, w0 \n\t" - "eors w2, w2, w0 \n\t" - "eors s1, w1, w3 \n\t" - "eors w3, w3, s0 \n\t" - "ands w0, s0, w0 \n\t" - "eors w0, s1, w0 \n\t" - "ands w1, w2, s1 \n\t" - "eors w1, s0, w1 \n\t" + "mvns %[w0], %[w0] \n\t" + "ands %[s0], %[w1], %[w0] \n\t" + "eors %[s0], %[w2], %[s0] \n\t" + "orrs %[w2], %[w1], %[w2] \n\t" + "eors %[w0], %[w3], %[w0] \n\t" + "eors %[w2], %[w2], %[w0] \n\t" + "eors %[s1], %[w1], %[w3] \n\t" + "eors %[w3], %[w3], %[s0] \n\t" + "ands %[w0], %[s0], %[w0] \n\t" + "eors %[w0], %[s1], %[w0] \n\t" + "ands %[w1], %[w2], %[s1] \n\t" + "eors %[w1], %[s0], %[w1] \n\t" "/*sbox second column*/ \n\t" - "mvns w4, w4 \n\t" - "ands s0, w5, w4 \n\t" - "eors s0, w6, s0 \n\t" - "orrs w6, w5, w6 \n\t" - "eors w4, w7, w4 \n\t" - "eors w6, w6, w4 \n\t" - "eors s1, w5, w7 \n\t" - "eors w7, w7, s0 \n\t" - "ands w4, s0, w4 \n\t" - "eors w4, s1, w4 \n\t" - "ands w5, w6, s1 \n\t" - "eors w5, s0, w5 \n\t" + "mvns %[w4], %[w4] \n\t" + "ands %[s0], %[w5], %[w4] \n\t" + "eors %[s0], %[w6], %[s0] \n\t" + "orrs %[w6], %[w5], %[w6] \n\t" + "eors %[w4], %[w7], %[w4] \n\t" + "eors %[w6], %[w6], %[w4] \n\t" + "eors %[s1], %[w5], %[w7] \n\t" + "eors %[w7], %[w7], %[s0] \n\t" + "ands %[w4], %[s0], %[w4] \n\t" + "eors %[w4], %[s1], %[w4] \n\t" + "ands %[w5], %[w6], %[s1] \n\t" + "eors %[w5], %[s0], %[w5] \n\t" "/*rotate shift left 1 bit*/ \n\t" - "ror s0, w1, #31 \n\t" - "ands s0, s0, one \n\t" - "lsl w1, w1, #1 \n\t" - "ror s1, w5, #31 \n\t" - "ands s1, s1, one \n\t" - "eors w1, w1, s1 \n\t" - "lsl w5, w5, #1 \n\t" - "eors w5, w5, s0 \n\t" + "ror %[s0], %[w1], #31 \n\t" + "ands %[s0], %[s0], %[one] \n\t" + "lsl %[w1], %[w1], #1 \n\t" + "ror %[s1], %[w5], #31 \n\t" + "ands %[s1], %[s1], %[one] \n\t" + "eors %[w1], %[w1], %[s1] \n\t" + "lsl %[w5], %[w5], #1 \n\t" + "eors %[w5], %[w5], %[s0] \n\t" "/*rotate shift left 8 bits*/ \n\t" - "ror s0, w2, #24 \n\t" - "ands s0, s0, ff \n\t" - "lsl w2, w2, #8 \n\t" - "ror s1, w6, #24 \n\t" - "ands s1, s1, ff \n\t" - "eors w2, w2, s1 \n\t" - "lsl w6, w6, #8 \n\t" - "eors w6, w6, s0 \n\t" + "ror %[s0], %[w2], #24 \n\t" + "ands %[s0], %[s0], %[ff] \n\t" + "lsl %[w2], %[w2], #8 \n\t" + "ror %[s1], %[w6], #24 \n\t" + "ands %[s1], %[s1], %[ff] \n\t" + "eors %[w2], %[w2], %[s1] \n\t" + "lsl %[w6], %[w6], #8 \n\t" + "eors %[w6], %[w6], %[s0] \n\t" "/*rotate shift left 25 bits*/ \n\t" - "ror s0, w3, #7 \n\t" - "ands s0, s0, s2 \n\t" - "lsl w3, w3, #25 \n\t" - "ror s1, w7, #7 \n\t" - "ands s1, s1, s2 \n\t" - "eors w3, w3, s1 \n\t" - "lsl w7, w7, #25 \n\t" - "eors w7, w7, s0 \n\t" + "ror %[s0], %[w3], #7 \n\t" + "ands %[s0], %[s0], %[s2] \n\t" + "lsl %[w3], %[w3], #25 \n\t" + "ror %[s1], %[w7], #7 \n\t" + "ands %[s1], %[s1], %[s2] \n\t" + "eors %[w3], %[w3], %[s1] \n\t" + "lsl %[w7], %[w7], #25 \n\t" + "eors %[w7], %[w7], %[s0] \n\t" "/*loop control*/ \n\t" - "adds rc, rc, #1 \n\t" - "subs rounds, rounds, #1 \n\t" - "bne enc_loop \n\t" - "str w0, [in] \n\t" - "str w4, [in, #4] \n\t" - "str w1, [in, #8] \n\t" - "str w5, [in, #12] \n\t" - "str w2, [in, #16] \n\t" - "str w6, [in, #20] \n\t" - "str w3, [in, #24] \n\t" - "str w7, [in, #28] \n\t" + "adds %[rc], %[rc], #1 \n\t" + "subs %[rounds], %[rounds], #1 \n\t" + "bne enc_loop_%= \n\t" + "str %[w0], [%[in]] \n\t" + "str %[w4], [%[in], #4] \n\t" + "str %[w1], [%[in], #8] \n\t" + "str %[w5], [%[in], #12] \n\t" + "str %[w2], [%[in], #16] \n\t" + "str %[w6], [%[in], #20] \n\t" + "str %[w3], [%[in], #24] \n\t" + "str %[w7], [%[in], #28] \n\t" + + : [rounds] "=r" (rounds), [rc] "=r" (rc), + [w0] "=r" (w0), [w1] "=r" (w1), [w2] "=r" (w2), [w3] "=r" (w3), + [w4] "=r" (w4), [w5] "=r" (w5), [w6] "=r" (w6), [w7] "=r" (w7), + [s0] "=r" (s0), [s1] "=r" (s1), [s2] "=r" (s2) + : [in] "r" (in), "[rounds]" (rounds), "[rc]" (rc), [ff] "r" (ff), [one] "r" (one) ); } diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c index 4b84924..56d4962 100644 --- a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c @@ -80,74 +80,77 @@ unsigned char constant6Format[63] = { static void permutation256(unsigned int *in, int rounds, unsigned char *rc) { uint32_t w0, w1, w2, w3, w4, w5, w6, w7; uint32_t s0, s1, s2; - uint32_t one = 0x1; - uint32_t i=0; - uint32_t ff = 0xff; __asm volatile( - "ldr w0, [in] \n\t" - "ldr w4, [in, #4] \n\t" - "ldr w1, [in, #8] \n\t" - "ldr w5, [in, #12] \n\t" - "ldr w2, [in, #16] \n\t" - "ldr w6, [in, #20] \n\t" - "ldr w3, [in, #24] \n\t" - "ldr w7, [in, #28] \n\t" - "enc_loop: \n\t" + "ldr %[w0], [%[in]] \n\t" + "ldr %[w4], [%[in], #4] \n\t" + "ldr %[w1], [%[in], #8] \n\t" + "ldr %[w5], [%[in], #12] \n\t" + "ldr %[w2], [%[in], #16] \n\t" + "ldr %[w6], [%[in], #20] \n\t" + "ldr %[w3], [%[in], #24] \n\t" + "ldr %[w7], [%[in], #28] \n\t" + "enc_loop_%=: \n\t" "/*add round const s0 s1*/ \n\t" - "ldrb s0, [rc] \n\t" - "LSR s1, s0, #4 \n\t" - "and s0, s0, 0xf \n\t" - "eors w4, w4, s0 \n\t" - "eors w0, w0, s1 \n\t" + "ldrb %[s0], [%[rc]] \n\t" + "LSR %[s1], %[s0], #4 \n\t" + "and %[s0], %[s0], 0xf \n\t" + "eors %[w4], %[w4], %[s0] \n\t" + "eors %[w0], %[w0], %[s1] \n\t" "/*sbox first column*/ \n\t" - "mvns w0, w0 \n\t" - "ands s0, w1, w0 \n\t" - "eors s0, w2, s0 \n\t" - "orrs w2, w1, w2 \n\t" - "eors w0, w3, w0 \n\t" - "eors w2, w2, w0 \n\t" - "eors s1, w1, w3 \n\t" - "eors w3, w3, s0 \n\t" - "ands w0, s0, w0 \n\t" - "eors w0, s1, w0 \n\t" - "ands w1, w2, s1 \n\t" - "eors w1, s0, w1 \n\t" + "mvns %[w0], %[w0] \n\t" + "ands %[s0], %[w1], %[w0] \n\t" + "eors %[s0], %[w2], %[s0] \n\t" + "orrs %[w2], %[w1], %[w2] \n\t" + "eors %[w0], %[w3], %[w0] \n\t" + "eors %[w2], %[w2], %[w0] \n\t" + "eors %[s1], %[w1], %[w3] \n\t" + "eors %[w3], %[w3], %[s0] \n\t" + "ands %[w0], %[s0], %[w0] \n\t" + "eors %[w0], %[s1], %[w0] \n\t" + "ands %[w1], %[w2], %[s1] \n\t" + "eors %[w1], %[s0], %[w1] \n\t" "/*sbox second column*/ \n\t" - "mvns w4, w4 \n\t" - "ands s0, w5, w4 \n\t" - "eors s0, w6, s0 \n\t" - "orrs w6, w5, w6 \n\t" - "eors w4, w7, w4 \n\t" - "eors w6, w6, w4 \n\t" - "eors s1, w5, w7 \n\t" - "eors w7, w7, s0 \n\t" - "ands w4, s0, w4 \n\t" - "eors w4, s1, w4 \n\t" - "ands w5, w6, s1 \n\t" - "eors w5, s0, w5 \n\t" + "mvns %[w4], %[w4] \n\t" + "ands %[s0], %[w5], %[w4] \n\t" + "eors %[s0], %[w6], %[s0] \n\t" + "orrs %[w6], %[w5], %[w6] \n\t" + "eors %[w4], %[w7], %[w4] \n\t" + "eors %[w6], %[w6], %[w4] \n\t" + "eors %[s1], %[w5], %[w7] \n\t" + "eors %[w7], %[w7], %[s0] \n\t" + "ands %[w4], %[s0], %[w4] \n\t" + "eors %[w4], %[s1], %[w4] \n\t" + "ands %[w5], %[w6], %[s1] \n\t" + "eors %[w5], %[s0], %[w5] \n\t" "/*rotate shift left 1 bit*/ \n\t" - "mov s0, w5 \n\t" - "ROR w5, w1, #31 \n\t" - "mov w1, s0 \n\t" + "mov %[s0], %[w5] \n\t" + "ROR %[w5], %[w1], #31 \n\t" + "mov %[w1], %[s0] \n\t" "/*rotate shift left 8 bits*/ \n\t" - "ROR w2, w2, #28 \n\t" - "ROR w6, w6, #28 \n\t" + "ROR %[w2], %[w2], #28 \n\t" + "ROR %[w6], %[w6], #28 \n\t" "/*rotate shift left 25 bits*/ \n\t" - "mov s0, w3 \n\t" - "ROR w3, w7, #20 \n\t" - "ROR w7, s0, #19 \n\t" + "mov %[s0], %[w3] \n\t" + "ROR %[w3], %[w7], #20 \n\t" + "ROR %[w7], %[s0], #19 \n\t" "/*loop control*/ \n\t" - "adds rc, rc, #1 \n\t" - "subs rounds, rounds, #1 \n\t" - "bne enc_loop \n\t" - "str w0, [in] \n\t" - "str w4, [in, #4] \n\t" - "str w1, [in, #8] \n\t" - "str w5, [in, #12] \n\t" - "str w2, [in, #16] \n\t" - "str w6, [in, #20] \n\t" - "str w3, [in, #24] \n\t" - "str w7, [in, #28] \n\t" + "adds %[rc], %[rc], #1 \n\t" + "subs %[rounds], %[rounds], #1 \n\t" + "bne enc_loop_%= \n\t" + "str %[w0], [%[in]] \n\t" + "str %[w4], [%[in], #4] \n\t" + "str %[w1], [%[in], #8] \n\t" + "str %[w5], [%[in], #12] \n\t" + "str %[w2], [%[in], #16] \n\t" + "str %[w6], [%[in], #20] \n\t" + "str %[w3], [%[in], #24] \n\t" + "str %[w7], [%[in], #28] \n\t" + + : [rounds] "=r" (rounds), [rc] "=r" (rc), + [w0] "=r" (w0), [w1] "=r" (w1), [w2] "=r" (w2), [w3] "=r" (w3), + [w4] "=r" (w4), [w5] "=r" (w5), [w6] "=r" (w6), [w7] "=r" (w7), + [s0] "=r" (s0), [s1] "=r" (s1), [s2] "=r" (s2) + : [in] "r" (in), "[rounds]" (rounds), "[rc]" (rc) ); }