Commit b6e4586f by Enrico Pozzobon

Fixed knot128v1/armcortexm_1 for GCC

parent e49defae
......@@ -10,14 +10,14 @@
#define KNOT_CIPHER 1
#if defined(KNOT_CIPHER) && (KNOT_CIPHER == 1)
unsigned char constant6[63] = {
0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06,
0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29,
0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28,
0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24,
0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37,
0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26,
0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f,
0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20 };
0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06,
0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29,
0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28,
0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24,
0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37,
0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26,
0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f,
0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20 };
/* State
* w4 w0
......@@ -26,225 +26,261 @@ unsigned char constant6[63] = {
* w7 w3
* Sbox
t1 = ~a;
t2 = b & t1;
t3 = c ^ t2;
h = d ^ t3;
t5 = b | c;
t6 = d ^ t1;
g = t5 ^ t6;
t8 = b ^ d;
t9 = t3 & t6;
e = t8 ^ t9;
t11 = g & t8;
f = t3 ^ t11;
t1 = ~a;
t2 = b & t1;
t3 = c ^ t2;
h = d ^ t3;
t5 = b | c;
t6 = d ^ t1;
g = t5 ^ t6;
t8 = b ^ d;
t9 = t3 & t6;
e = t8 ^ t9;
t11 = g & t8;
f = t3 ^ t11;
* Sbox after change
a = ~a;
s0 = b & a;
s0 = c ^ s0;
c = b | c;
a = d ^ a;
c = c ^ a;
s1 = b ^ d;
d = d ^ s0;
a = s0 & a;
a = s1 ^ a;
b = c & s1;
b = s0 ^ b;
a = ~a;
s0 = b & a;
s0 = c ^ s0;
c = b | c;
a = d ^ a;
c = c ^ a;
s1 = b ^ d;
d = d ^ s0;
a = s0 & a;
a = s1 ^ a;
b = c & s1;
b = s0 ^ b;
static void permutation256(unsigned char *in, int rounds, unsigned char *rc) {
uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
uint32_t s0, s1, s2;
uint32_t one = 0x1;
uint32_t ff = 0xff;
__asm volatile(
"enc_loop_%=: \n\t"
"ldr %[w0], [%[in]] \n\t"
"ldr %[w4], [%[in], #4] \n\t"
"ldr %[w1], [%[in], #8] \n\t"
"ldr %[w5], [%[in], #12] \n\t"
"ldr %[w2], [%[in], #16] \n\t"
"ldr %[w6], [%[in], #20] \n\t"
"ldr %[w3], [%[in], #24] \n\t"
"ldr %[w7], [%[in], #28] \n\t"
"mov %[s0], 0xfff \n\t"
"mov %[s2], 0x1fff \n\t"
"lsl %[s2], %[s2], #12 \n\t"
"eors %[s2], %[s2], %[s0] \n\t"
"/*add round const*/ \n\t"
"ldrb %[s0], [%[rc]] \n\t"
"eors %[w0], %[w0], %[s0] \n\t"
"/*sbox first column*/ \n\t"
"mvns %[w0], %[w0] \n\t"
"ands %[s0], %[w1], %[w0] \n\t"
"eors %[s0], %[w2], %[s0] \n\t"
"orrs %[w2], %[w1], %[w2] \n\t"
"eors %[w0], %[w3], %[w0] \n\t"
"eors %[w2], %[w2], %[w0] \n\t"
"eors %[s1], %[w1], %[w3] \n\t"
"eors %[w3], %[w3], %[s0] \n\t"
"ands %[w0], %[s0], %[w0] \n\t"
"eors %[w0], %[s1], %[w0] \n\t"
"ands %[w1], %[w2], %[s1] \n\t"
"eors %[w1], %[s0], %[w1] \n\t"
"/*sbox second column*/ \n\t"
"mvns %[w4], %[w4] \n\t"
"ands %[s0], %[w5], %[w4] \n\t"
"eors %[s0], %[w6], %[s0] \n\t"
"orrs %[w6], %[w5], %[w6] \n\t"
"eors %[w4], %[w7], %[w4] \n\t"
"eors %[w6], %[w6], %[w4] \n\t"
"eors %[s1], %[w5], %[w7] \n\t"
"eors %[w7], %[w7], %[s0] \n\t"
"ands %[w4], %[s0], %[w4] \n\t"
"eors %[w4], %[s1], %[w4] \n\t"
"ands %[w5], %[w6], %[s1] \n\t"
"eors %[w5], %[s0], %[w5] \n\t"
"/*rotate shift left 1 bit*/ \n\t"
"ror %[s0], %[w1], #31 \n\t"
"ands %[s0], %[s0], %[one] \n\t"
"lsl %[w1], %[w1], #1 \n\t"
"ror %[s1], %[w5], #31 \n\t"
"ands %[s1], %[s1], %[one] \n\t"
"eors %[w1], %[w1], %[s1] \n\t"
"lsl %[w5], %[w5], #1 \n\t"
"eors %[w5], %[w5], %[s0] \n\t"
"/*rotate shift left 8 bits*/ \n\t"
"ror %[s0], %[w2], #24 \n\t"
"ands %[s0], %[s0], %[ff] \n\t"
"lsl %[w2], %[w2], #8 \n\t"
"ror %[s1], %[w6], #24 \n\t"
"ands %[s1], %[s1], %[ff] \n\t"
"eors %[w2], %[w2], %[s1] \n\t"
"lsl %[w6], %[w6], #8 \n\t"
"eors %[w6], %[w6], %[s0] \n\t"
"/*rotate shift left 25 bits*/ \n\t"
"ror %[s0], %[w3], #7 \n\t"
"ands %[s0], %[s0], %[s2] \n\t"
"lsl %[w3], %[w3], #25 \n\t"
"ror %[s1], %[w7], #7 \n\t"
"ands %[s1], %[s1], %[s2] \n\t"
"eors %[w3], %[w3], %[s1] \n\t"
"lsl %[w7], %[w7], #25 \n\t"
"eors %[w7], %[w7], %[s0] \n\t"
"/*loop control*/ \n\t"
"adds %[rc], %[rc], #1 \n\t"
"subs %[rounds], %[rounds], #1 \n\t"
"bne enc_loop_%= \n\t"
"str %[w0], [%[in]] \n\t"
"str %[w4], [%[in], #4] \n\t"
"str %[w1], [%[in], #8] \n\t"
"str %[w5], [%[in], #12] \n\t"
"str %[w2], [%[in], #16] \n\t"
"str %[w6], [%[in], #20] \n\t"
"str %[w3], [%[in], #24] \n\t"
"str %[w7], [%[in], #28] \n\t"
: [rounds] "=r" (rounds), [rc] "=r" (rc),
[w0] "=r" (w0), [w1] "=r" (w1), [w2] "=r" (w2), [w3] "=r" (w3),
[w4] "=r" (w4), [w5] "=r" (w5), [w6] "=r" (w6), [w7] "=r" (w7),
[s0] "=r" (s0), [s1] "=r" (s1), [s2] "=r" (s2)
: [in] "r" (in), "[rounds]" (rounds), "[rc]" (rc), [ff] "r" (ff), [one] "r" (one)
/* ----------------------------------- */
/* r0 - point of plain keys */
/* r1 - rounds */
/* r2 - round constants */
/* r3 - tmp 0 */
/* r4 - p0 lower 32 bits */
/* r5 - p1 lower 32 bits */
/* r6 - p2 lower 32 bits */
/* r7 - p3 lower 32 bits */
/* r8 - p0 32 bits */
/* r9 - p1 32 bits */
/* r10 - p2 32 bits */
/* r11 - p3 32 bits */
/* r12 - tmp 1 */
/* r14 - tmp 2 */
asm volatile (
* the first argument is stored in r0, the second in r1, third in r2 and fourth in r3
* if add '!', the value will be updated
"sub sp, sp, #12 \n\t"
"/* store 0xff */ \n\t"
"mov r3, 0xff \n\t"
"str r3, [sp, #4] \n\t"
"/* store 0x1ffffff */ \n\t"
"mov r12, r3 \n\t"
"lsl r3, r3, #8 \n\t"
"eors r3, r3, r12 \n\t"
"lsl r3, r3, #8 \n\t"
"eors r3, r3, r12 \n\t"
"lsl r3, r3, #1 \n\t"
"mov r14, 0x1 \n\t"
"eors r3, r3, r14 \n\t"
"str r3, [sp] \n\t"
"/* store 0x1 */ \n\t"
"str r14, [sp, #8] \n\t"
"/* load plain text */\n\t"
"ldr r4, [%[in]] \n\t"
"ldr r8, [%[in], #4] \n\t"
"ldr r5, [%[in], #8] \n\t"
"ldr r9, [%[in], #12] \n\t"
"ldr r6, [%[in], #16] \n\t"
"ldr r10, [%[in], #20] \n\t"
"ldr r7, [%[in], #24] \n\t"
"ldr r11, [%[in], #28] \n\t"
"enc_loop: \n\t"
"/*add round const*/ \n\t"
"ldrb r3, [%[rc]] \n\t"
"eors r4, r4, r3 \n\t"
"/*sbox first column*/ \n\t"
"mvns r4, r4 \n\t"
"ands r3, r5, r4 \n\t"
"eors r3, r6, r3 \n\t"
"orrs r6, r5, r6 \n\t"
"eors r4, r7, r4 \n\t"
"eors r6, r6, r4 \n\t"
"eors r12, r5, r7 \n\t"
"eors r7, r7, r3 \n\t"
"ands r4, r3, r4 \n\t"
"eors r4, r12,r4 \n\t"
"ands r5, r6, r12 \n\t"
"eors r5, r3, r5 \n\t"
"/*sbox second column*/ \n\t"
"mvns r8, r8 \n\t"
"ands r3, r9, r8 \n\t"
"eors r3, r10,r3 \n\t"
"orrs r10, r9, r10 \n\t"
"eors r8, r11,r8 \n\t"
"eors r10, r10,r8 \n\t"
"eors r12, r9, r11 \n\t"
"eors r11, r11,r3 \n\t"
"ands r8, r3, r8 \n\t"
"eors r8, r12,r8 \n\t"
"ands r9, r10,r12 \n\t"
"eors r9, r3, r9 \n\t"
"/*rotate shift left 1 bit*/ \n\t"
"ldr r14, [sp, #8] \n\t"
"ror r3, r5, #31 \n\t"
"ands r3, r3, r14 \n\t"
"lsl r5, r5, #1 \n\t"
"ror r12, r9, #31 \n\t"
"ands r12, r12,r14 \n\t"
"eors r5, r5, r12 \n\t"
"lsl r9, r9, #1 \n\t"
"eors r9, r9, r3 \n\t"
"/*rotate shift left 8 bits*/ \n\t"
"ldr r14, [sp, #4] \n\t"
"ror r3, r6, #24 \n\t"
"ands r3, r3, r14 \n\t"
"lsl r6, r6, #8 \n\t"
"ror r12, r10,#24 \n\t"
"ands r12, r12,r14 \n\t"
"eors r6, r6, r12 \n\t"
"lsl r10, r10,#8 \n\t"
"eors r10, r10,r3 \n\t"
"/*rotate shift left 25 bits*/ \n\t"
"ldr r14, [sp] \n\t"
"ror r3, r7, #7 \n\t"
"ands r3, r3, r14 \n\t"
"lsl r7, r7, #25 \n\t"
"ror r12, r11,#7 \n\t"
"ands r12, r12,r14 \n\t"
"eors r7, r7, r12 \n\t"
"lsl r11, r11,#25 \n\t"
"eors r11, r11,r3 \n\t"
"/*loop control*/ \n\t"
"adds %[rc], %[rc], #1 \n\t"
"subs %[ro], %[ro], #1 \n\t"
"bne enc_loop \n\t"
"str r4, [%[in]] \n\t"
"str r8, [%[in], #4] \n\t"
"str r5, [%[in], #8] \n\t"
"str r9, [%[in], #12] \n\t"
"str r6, [%[in], #16] \n\t"
"str r10, [%[in], #20] \n\t"
"str r7, [%[in], #24] \n\t"
"str r11, [%[in], #28] \n\t"
/* ----------------------------- */
"add sp, sp, #12 \n\t"
: /* output variables - including inputs that are changed */
[ro] "=r" (rounds), [rc] "=r" (rc)
: /* input variables */
[in] "r" (in), "[ro]" (rounds), "[rc]" (rc)
: /* clobber registers for temporary values */
"r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "r14"
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
unsigned int u = 0;
unsigned int v = 0;
unsigned int v1 = 0;
unsigned int i;
unsigned int last_index = 0;
unsigned char *A = NULL;
unsigned char *M = NULL;
unsigned char S[32];
unsigned int *A32 = NULL;
unsigned int *M32 = NULL;
unsigned int *S32 = NULL;
unsigned int *C32 = NULL;
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
unsigned int u = 0;
unsigned int v = 0;
unsigned int v1 = 0;
unsigned int i;
unsigned int last_index = 0;
unsigned char *A = NULL;
unsigned char *M = NULL;
unsigned char S[32];
unsigned int *A32 = NULL;
unsigned int *M32 = NULL;
unsigned int *S32 = NULL;
unsigned int *C32 = NULL;
// pad associated data
if (adlen != 0) {
u = (adlen + 8) >> 3;
A = malloc(u << 3);
if (A == NULL) {
return -1;
memset(A, 0, u << 3);
memcpy(A, ad, adlen);
A[adlen] = 0x01;
A32 = (unsigned int *)A;
// pad associated data
if (adlen != 0) {
u = (adlen + 8) >> 3;
A = malloc(u << 3);
if (A == NULL) {
return -1;
memset(A, 0, u << 3);
memcpy(A, ad, adlen);
A[adlen] = 0x01;
A32 = (unsigned int *)A;
// pad plaintext data
if (mlen != 0) {
v = (mlen + 8) >> 3;
M = malloc(v << 3);
if (M == NULL) {
return -1;
memset(M, 0, v << 3);
memcpy(M, m, mlen);
M[mlen] = 0x01;
M32 = (unsigned int *)M;
// pad plaintext data
if (mlen != 0) {
v = (mlen + 8) >> 3;
M = malloc(v << 3);
if (M == NULL) {
return -1;
memset(M, 0, v << 3);
memcpy(M, m, mlen);
M[mlen] = 0x01;
M32 = (unsigned int *)M;
// initalization
memcpy(S, npub, CRYPTO_NPUBBYTES);
permutation256(S, 52, constant6);
S32 = (unsigned int *)S;
// initalization
memcpy(S, npub, CRYPTO_NPUBBYTES);
permutation256(S, 52, constant6);
S32 = (unsigned int *)S;
// processiong associated data
if (adlen != 0) {
for (i = 0; i < u; i++) {
S32[0] ^= A32[0];
S32[1] ^= A32[1];
A32 = A32 + 2;
permutation256(S, 28, constant6);
S[31] ^= 0x80;
// processiong associated data
if (adlen != 0) {
for (i = 0; i < u; i++) {
S32[0] ^= A32[0];
S32[1] ^= A32[1];
A32 = A32 + 2;
permutation256(S, 28, constant6);
S[31] ^= 0x80;
// Encryption processiong plaintext data
if (mlen != 0) {
C32 = (unsigned int *)c;
for (i = 0; i < v - 1; i++) {
S32[0] ^= M32[0];
S32[1] ^= M32[1];
M32 = M32 + 2;
C32[0] = S32[0];
C32[1] = S32[1];
C32 = C32 + 2;
permutation256(S, 28, constant6);
v1 = mlen % 8;
last_index = (v - 1) << 3;
for (i = 0; i < v1; i++) {
S[i] ^= M[last_index + i];
c[last_index + i] = S[i];
S[i] ^= 0x01;
// Encryption processiong plaintext data
if (mlen != 0) {
C32 = (unsigned int *)c;
for (i = 0; i < v - 1; i++) {
S32[0] ^= M32[0];
S32[1] ^= M32[1];
M32 = M32 + 2;
C32[0] = S32[0];
C32[1] = S32[1];
C32 = C32 + 2;
permutation256(S, 28, constant6);
v1 = mlen % 8;
last_index = (v - 1) << 3;
for (i = 0; i < v1; i++) {
S[i] ^= M[last_index + i];
c[last_index + i] = S[i];
S[i] ^= 0x01;
// finalization
permutation256(S, 32, constant6);
// finalization
permutation256(S, 32, constant6);
// return tag
memcpy(c + mlen, S, CRYPTO_ABYTES);
*clen = mlen + CRYPTO_ABYTES;
if (A != NULL) {
if (M != NULL) {
return 0;
// return tag
memcpy(c + mlen, S, CRYPTO_ABYTES);
*clen = mlen + CRYPTO_ABYTES;
if (A != NULL) {
if (M != NULL) {
return 0;
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
......@@ -253,107 +289,107 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k)
unsigned int u;
unsigned int v = ((clen - CRYPTO_ABYTES) >> 3) + 1;
unsigned int v1;
unsigned int last_index;
unsigned int i;
unsigned char *A = NULL;
unsigned char S[32];
unsigned int *A32 = NULL;
unsigned int *M32 = NULL;
unsigned int *S32 = NULL;
unsigned int *C32 = NULL;
unsigned int u = 0;
unsigned int v = ((clen - CRYPTO_ABYTES) >> 3) + 1;
unsigned int v1;
unsigned int last_index;
unsigned int i;
unsigned char *A = NULL;
unsigned char S[32];
unsigned int *A32 = NULL;
unsigned int *M32 = NULL;
unsigned int *S32 = NULL;
unsigned int *C32 = NULL;
*mlen = 0;
if (clen < CRYPTO_ABYTES) {
return -1;
*mlen = 0;
if (clen < CRYPTO_ABYTES) {
return -1;
// pad associated data
if (adlen != 0) {
u = (adlen + 8) >> 3;
A = malloc(u << 3);
if (A == NULL) {
return -1;
memset(A, 0, u << 3);
memcpy(A, ad, adlen);
A[adlen] = 0x01;
A32 = (unsigned int *)A;
M32 = (unsigned int *)m;
C32 = (unsigned int *)c;
// pad associated data
if (adlen != 0) {
u = (adlen + 8) >> 3;
A = malloc(u << 3);
if (A == NULL) {
return -1;
memset(A, 0, u << 3);
memcpy(A, ad, adlen);
A[adlen] = 0x01;
A32 = (unsigned int *)A;
M32 = (unsigned int *)m;
C32 = (unsigned int *)c;
// initalization
memcpy(S, npub, CRYPTO_NPUBBYTES);
permutation256(S, 52, constant6);
S32 = (unsigned int *)S;
// initalization
memcpy(S, npub, CRYPTO_NPUBBYTES);
permutation256(S, 52, constant6);
S32 = (unsigned int *)S;
// processiong associated data
if (adlen != 0) {
for (i = 0; i < u; i++) {
S32[0] ^= A32[0];
S32[1] ^= A32[1];
A32 = A32 + 2;
permutation256(S, 28, constant6);
S[31] ^= 0x80;
// processiong associated data
if (adlen != 0) {
for (i = 0; i < u; i++) {
S32[0] ^= A32[0];
S32[1] ^= A32[1];
A32 = A32 + 2;
permutation256(S, 28, constant6);
S[31] ^= 0x80;
// Encryption processiong ciphertext data
if (clen != CRYPTO_ABYTES) {
C32 = (unsigned int *)c;
for (i = 0; i < v - 1; i++) {
M32[0] = S32[0] ^ C32[0];
M32[1] = S32[1] ^ C32[1];
S32[0] = C32[0];
S32[1] = C32[1];
M32 = M32 + 2;
C32 = C32 + 2;
permutation256(S, 28, constant6);
v1 = (clen - CRYPTO_ABYTES) % 8;
last_index = (v - 1) << 3;
for (i = 0; i < v1; i++) {
m[last_index + i] = S[i] ^ c[last_index + i];
S[i] = c[last_index + i];
S[i] ^= 0x01;
// Encryption processiong ciphertext data
if (clen != CRYPTO_ABYTES) {
C32 = (unsigned int *)c;
for (i = 0; i < v - 1; i++) {
M32[0] = S32[0] ^ C32[0];
M32[1] = S32[1] ^ C32[1];
S32[0] = C32[0];
S32[1] = C32[1];
M32 = M32 + 2;
C32 = C32 + 2;
permutation256(S, 28, constant6);
v1 = (clen - CRYPTO_ABYTES) % 8;
last_index = (v - 1) << 3;
for (i = 0; i < v1; i++) {
m[last_index + i] = S[i] ^ c[last_index + i];
S[i] = c[last_index + i];
S[i] ^= 0x01;
// finalization
permutation256(S, 32, constant6);
// finalization
permutation256(S, 32, constant6);
// return -1 if verification fails
for (i = 0; i < CRYPTO_ABYTES; i++) {
if (c[clen - CRYPTO_ABYTES + i] != S[i]) {
memset(m, 0, clen - CRYPTO_ABYTES);
return -1;
*mlen = clen - CRYPTO_ABYTES;
if (A != NULL) {
return 0;
// return -1 if verification fails
for (i = 0; i < CRYPTO_ABYTES; i++) {
if (c[clen - CRYPTO_ABYTES + i] != S[i]) {
memset(m, 0, clen - CRYPTO_ABYTES);
return -1;
*mlen = clen - CRYPTO_ABYTES;
if (A != NULL) {
return 0;
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
return 0;
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
return 0;
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
return 0;
return 0;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment