From 3f9040774adf7b61dc258087a323965b7c69634c Mon Sep 17 00:00:00 2001 From: KNOT team Date: Sat, 30 May 2020 06:57:10 +0000 Subject: [PATCH] knot armcortex --- knot/Implementations/crypto_aead/knot128v1/armcortexm_1/api.h | 9 +++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_1/crypto_aead.h | 18 ++++++++++++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_1/encrypt.c | 353 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_2/api.h | 8 ++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.h | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_2/crypto_aead.h | 18 ++++++++++++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c | 300 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_1/api.h | 9 +++++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_1/crypto_aead.h | 18 ++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_1/encrypt.c | 422 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_2/api.h | 7 +++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_2/crypto_aead.h | 17 +++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c | 333 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_1/api.h | 9 +++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_1/crypto_aead.h | 18 ++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_1/encrypt.c | 400 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_2/api.h | 6 ++++++ knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h | 251 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_2/crypto_aead.h | 18 ++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c | 287 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_1/api.h | 9 +++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_1/crypto_aead.h | 18 ++++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_1/encrypt.c | 444 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_2/api.h | 6 ++++++ knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_2/crypto_aead.h | 17 +++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c | 452 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 28 files changed, 3786 insertions(+) create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_1/api.h create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_1/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_1/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_2/api.h create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.h create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_2/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_1/api.h create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_1/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_1/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_2/api.h create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_2/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_1/api.h create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_1/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_1/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_2/api.h create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_2/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_1/api.h create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_1/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_1/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_2/api.h create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_2/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/api.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/api.h new file mode 100644 index 0000000..9cdadbb --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/api.h @@ -0,0 +1,9 @@ +#ifndef KNOT_API_H +#define KNOT_API_H +//k=n=tag=128 b=256 r=64 c=192 +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#endif diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/crypto_aead.h new file mode 100644 index 0000000..8f53846 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/crypto_aead.h @@ -0,0 +1,18 @@ +#ifndef KNOT_CRYPTO_AEAD_H +#define KNOT_CRYPTO_AEAD_H +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); +#endif diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/encrypt.c new file mode 100644 index 0000000..a7fd7a4 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_1/encrypt.c @@ -0,0 +1,353 @@ +#include +#include +#include +#include +#include "crypto_aead.h" +#include "api.h" + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) + +#define KNOT_CIPHER 1 +#if defined(KNOT_CIPHER) && (KNOT_CIPHER == 1) +unsigned char constant6[63] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06, + 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29, + 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28, + 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24, + 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37, + 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26, + 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f, + 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20 }; + +/* State + * w4 w0 + * w5 w1 + * w6 w2 + * w7 w3 + * + * Sbox + t1 = ~a; + t2 = b & t1; + t3 = c ^ t2; + h = d ^ t3; + t5 = b | c; + t6 = d ^ t1; + g = t5 ^ t6; + t8 = b ^ d; + t9 = t3 & t6; + e = t8 ^ t9; + t11 = g & t8; + f = t3 ^ t11; + * + * Sbox after change + a = ~a; + s0 = b & a; + s0 = c ^ s0; + c = b | c; + a = d ^ a; + c = c ^ a; + s1 = b ^ d; + d = d ^ s0; + a = s0 & a; + a = s1 ^ a; + b = c & s1; + b = s0 ^ b; + */ +static void permutation256(unsigned char *in, int rounds, unsigned char *rc) { + uint32_t w0, w1, w2, w3, w4, w5, w6, w7; + uint32_t s0, s1, s2; + uint32_t one = 0x1; + uint32_t ff = 0xff; + __asm volatile( + "ldr w0, [in] \n\t" + "ldr w4, [in, #4] \n\t" + "ldr w1, [in, #8] \n\t" + "ldr w5, [in, #12] \n\t" + "ldr w2, [in, #16] \n\t" + "ldr w6, [in, #20] \n\t" + "ldr w3, [in, #24] \n\t" + "ldr w7, [in, #28] \n\t" + "mov s0, 0xfff \n\t" + "mov s2, 0x1fff \n\t" + "lsl s2, s2, #12 \n\t" + "eors s2, s2, s0 \n\t" + "enc_loop: \n\t" + "/*add round const*/ \n\t" + "ldrb s0, [rc] \n\t" + "eors w0, w0, s0 \n\t" + "/*sbox first column*/ \n\t" + "mvns w0, w0 \n\t" + "ands s0, w1, w0 \n\t" + "eors s0, w2, s0 \n\t" + "orrs w2, w1, w2 \n\t" + "eors w0, w3, w0 \n\t" + "eors w2, w2, w0 \n\t" + "eors s1, w1, w3 \n\t" + "eors w3, w3, s0 \n\t" + "ands w0, s0, w0 \n\t" + "eors w0, s1, w0 \n\t" + "ands w1, w2, s1 \n\t" + "eors w1, s0, w1 \n\t" + "/*sbox second column*/ \n\t" + "mvns w4, w4 \n\t" + "ands s0, w5, w4 \n\t" + "eors s0, w6, s0 \n\t" + "orrs w6, w5, w6 \n\t" + "eors w4, w7, w4 \n\t" + "eors w6, w6, w4 \n\t" + "eors s1, w5, w7 \n\t" + "eors w7, w7, s0 \n\t" + "ands w4, s0, w4 \n\t" + "eors w4, s1, w4 \n\t" + "ands w5, w6, s1 \n\t" + "eors w5, s0, w5 \n\t" + "/*rotate shift left 1 bit*/ \n\t" + "ror s0, w1, #31 \n\t" + "ands s0, s0, one \n\t" + "lsl w1, w1, #1 \n\t" + "ror s1, w5, #31 \n\t" + "ands s1, s1, one \n\t" + "eors w1, w1, s1 \n\t" + "lsl w5, w5, #1 \n\t" + "eors w5, w5, s0 \n\t" + "/*rotate shift left 8 bits*/ \n\t" + "ror s0, w2, #24 \n\t" + "ands s0, s0, ff \n\t" + "lsl w2, w2, #8 \n\t" + "ror s1, w6, #24 \n\t" + "ands s1, s1, ff \n\t" + "eors w2, w2, s1 \n\t" + "lsl w6, w6, #8 \n\t" + "eors w6, w6, s0 \n\t" + "/*rotate shift left 25 bits*/ \n\t" + "ror s0, w3, #7 \n\t" + "ands s0, s0, s2 \n\t" + "lsl w3, w3, #25 \n\t" + "ror s1, w7, #7 \n\t" + "ands s1, s1, s2 \n\t" + "eors w3, w3, s1 \n\t" + "lsl w7, w7, #25 \n\t" + "eors w7, w7, s0 \n\t" + "/*loop control*/ \n\t" + "adds rc, rc, #1 \n\t" + "subs rounds, rounds, #1 \n\t" + "bne enc_loop \n\t" + "str w0, [in] \n\t" + "str w4, [in, #4] \n\t" + "str w1, [in, #8] \n\t" + "str w5, [in, #12] \n\t" + "str w2, [in, #16] \n\t" + "str w6, [in, #20] \n\t" + "str w3, [in, #24] \n\t" + "str w7, [in, #28] \n\t" + ); +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + unsigned int u = 0; + unsigned int v = 0; + unsigned int v1 = 0; + unsigned int i; + unsigned int last_index = 0; + unsigned char *A = NULL; + unsigned char *M = NULL; + unsigned char S[32]; + unsigned int *A32 = NULL; + unsigned int *M32 = NULL; + unsigned int *S32 = NULL; + unsigned int *C32 = NULL; + + // pad associated data + if (adlen != 0) { + u = (adlen + 8) >> 3; + A = malloc(u << 3); + if (A == NULL) { + return -1; + } + memset(A, 0, u << 3); + memcpy(A, ad, adlen); + A[adlen] = 0x01; + A32 = (unsigned int *)A; + } + + // pad plaintext data + if (mlen != 0) { + v = (mlen + 8) >> 3; + M = malloc(v << 3); + if (M == NULL) { + free(A); + return -1; + } + memset(M, 0, v << 3); + memcpy(M, m, mlen); + M[mlen] = 0x01; + M32 = (unsigned int *)M; + } + + // initalization + memcpy(S, npub, CRYPTO_NPUBBYTES); + memcpy(S + CRYPTO_NPUBBYTES, k, CRYPTO_KEYBYTES); + permutation256(S, 52, constant6); + S32 = (unsigned int *)S; + + // processiong associated data + if (adlen != 0) { + for (i = 0; i < u; i++) { + S32[0] ^= A32[0]; + S32[1] ^= A32[1]; + A32 = A32 + 2; + permutation256(S, 28, constant6); + } + } + S[31] ^= 0x80; + + // Encryption processiong plaintext data + if (mlen != 0) { + C32 = (unsigned int *)c; + for (i = 0; i < v - 1; i++) { + S32[0] ^= M32[0]; + S32[1] ^= M32[1]; + M32 = M32 + 2; + C32[0] = S32[0]; + C32[1] = S32[1]; + C32 = C32 + 2; + permutation256(S, 28, constant6); + } + v1 = mlen % 8; + last_index = (v - 1) << 3; + for (i = 0; i < v1; i++) { + S[i] ^= M[last_index + i]; + c[last_index + i] = S[i]; + } + S[i] ^= 0x01; + } + + // finalization + permutation256(S, 32, constant6); + + // return tag + memcpy(c + mlen, S, CRYPTO_ABYTES); + *clen = mlen + CRYPTO_ABYTES; + if (A != NULL) { + free(A); + } + if (M != NULL) { + free(M); + } + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + unsigned int u; + unsigned int v = ((clen - CRYPTO_ABYTES) >> 3) + 1; + unsigned int v1; + unsigned int last_index; + unsigned int i; + unsigned char *A = NULL; + unsigned char S[32]; + unsigned int *A32 = NULL; + unsigned int *M32 = NULL; + unsigned int *S32 = NULL; + unsigned int *C32 = NULL; + + *mlen = 0; + if (clen < CRYPTO_ABYTES) { + return -1; + } + + // pad associated data + if (adlen != 0) { + u = (adlen + 8) >> 3; + A = malloc(u << 3); + if (A == NULL) { + return -1; + } + memset(A, 0, u << 3); + memcpy(A, ad, adlen); + A[adlen] = 0x01; + A32 = (unsigned int *)A; + } + + M32 = (unsigned int *)m; + C32 = (unsigned int *)c; + + // initalization + memcpy(S, npub, CRYPTO_NPUBBYTES); + memcpy(S + CRYPTO_NPUBBYTES, k, CRYPTO_KEYBYTES); + permutation256(S, 52, constant6); + S32 = (unsigned int *)S; + + // processiong associated data + if (adlen != 0) { + for (i = 0; i < u; i++) { + S32[0] ^= A32[0]; + S32[1] ^= A32[1]; + A32 = A32 + 2; + permutation256(S, 28, constant6); + } + } + S[31] ^= 0x80; + + // Encryption processiong ciphertext data + if (clen != CRYPTO_ABYTES) { + C32 = (unsigned int *)c; + for (i = 0; i < v - 1; i++) { + M32[0] = S32[0] ^ C32[0]; + M32[1] = S32[1] ^ C32[1]; + S32[0] = C32[0]; + S32[1] = C32[1]; + M32 = M32 + 2; + C32 = C32 + 2; + permutation256(S, 28, constant6); + } + v1 = (clen - CRYPTO_ABYTES) % 8; + last_index = (v - 1) << 3; + for (i = 0; i < v1; i++) { + m[last_index + i] = S[i] ^ c[last_index + i]; + S[i] = c[last_index + i]; + } + S[i] ^= 0x01; + } + + // finalization + permutation256(S, 32, constant6); + + // return -1 if verification fails + for (i = 0; i < CRYPTO_ABYTES; i++) { + if (c[clen - CRYPTO_ABYTES + i] != S[i]) { + memset(m, 0, clen - CRYPTO_ABYTES); + return -1; + } + } + *mlen = clen - CRYPTO_ABYTES; + if (A != NULL) { + free(A); + } + return 0; +} +#else +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + return 0; +} +#endif + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/api.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/api.h new file mode 100644 index 0000000..2c52a6d --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/api.h @@ -0,0 +1,8 @@ +//k=n=tag=128 b=256 r=64 c=192 +#define CRYPTO_KEYBYTES 16 // +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.h new file mode 100644 index 0000000..a5c1b7e --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/auxFormat.h @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include"crypto_aead.h" +#include"api.h" +#include +#define U32BIG(x) (x) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define sbox(a, b, c, d, e, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define packFormat(out,in) {\ +t1 = U32BIG(((u32*)in)[0]); \ +t2 = U32BIG(((u32*)in)[1]); \ +t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \ +t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \ +t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \ +t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \ +t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \ +t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \ +t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \ +t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \ +out[0] = (t2 & 0xFFFF0000) | (t1 >> 16); \ +out[1] = (t2 << 16) | (t1 & 0x0000FFFF); \ +} +#define unpackFormat(out, in) {\ + t2 = (in[0] & 0xFFFF0000) | (in[1] >> 16); \ + t1 = (in[1] & 0x0000FFFF) | (in[0] << 16); \ + t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \ + t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \ + t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \ + t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \ + t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \ + t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \ + t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \ + t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \ + *((u64*)out) = ((u64)t2 << 32 | t1); \ +} +#define getU32Format(out, in) {\ + t1, t2 = U32BIG(((u32*)in)[0]); \ + t1 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t1 ^ (t1 << 1); \ + t1 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t1 ^ (t1 << 2); \ + t1 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t1 ^ (t1 << 4); \ + t1 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t1 ^ (t1 << 8); \ + *out = t2; \ +} +#define ROUND256( constant6Format,lunNum) {\ + s[0] ^= constant6Format[lunNum]>> 4;\ + s[1] ^= constant6Format[lunNum]& 0x0f;\ + sbox(s[0], s[2], s[4], s[6], s_temp[0], s_temp[2], s_temp[4], s_temp[6]);\ + sbox(s[1], s[3], s[5], s[7], s_temp[1], s_temp[3], s_temp[5], s_temp[7]);\ + s[0] = s_temp[0];\ + s[1] = s_temp[1];\ + s[2] = s_temp[3];\ + s[3] = LOTR32(s_temp[2], 1);\ + s[4] = LOTR32(s_temp[4], 4);\ + s[5] = LOTR32(s_temp[5], 4);\ + s[6] = LOTR32(s_temp[7], 12);\ + s[7] = LOTR32(s_temp[6], 13);\ +} +void printfFormat(char name[], u32 * in); +void printU8(char name[], u8 var[], long len, int offset); + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c new file mode 100644 index 0000000..4b84924 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_2/encrypt.c @@ -0,0 +1,300 @@ + +#include"auxFormat.h" + +#define RATE (64 / 8) + +#define PR0_ROUNDS 52 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 +unsigned char constant6Format[63] = { + /*constant6_aead_128v1:*/ +0x1, +0x10, +0x2, +0x20, +0x4, +0x41, +0x11, +0x12, +0x22, +0x24, +0x45, +0x50, +0x3, +0x30, +0x6, +0x61, +0x15, +0x53, +0x33, +0x36, +0x67, +0x74, +0x46, +0x60, +0x5, +0x51, +0x13, +0x32, +0x26, +0x65, +0x54, +0x42, +0x21, +0x14, +0x43, +0x31, +0x16, +0x63, +0x35, +0x57, +0x72, +0x27, +0x75, +0x56, +0x62, +0x25, +0x55, +0x52, +0x23, +0x34, +0x47, +0x70, +0x7, +0x71, +0x17, +0x73, +0x37, +0x77, +0x76, +0x66, +0x64, +0x44, +0x40, + +}; + + + + +static void permutation256(unsigned int *in, int rounds, unsigned char *rc) { + uint32_t w0, w1, w2, w3, w4, w5, w6, w7; + uint32_t s0, s1, s2; + uint32_t one = 0x1; + uint32_t i=0; + uint32_t ff = 0xff; + __asm volatile( + "ldr w0, [in] \n\t" + "ldr w4, [in, #4] \n\t" + "ldr w1, [in, #8] \n\t" + "ldr w5, [in, #12] \n\t" + "ldr w2, [in, #16] \n\t" + "ldr w6, [in, #20] \n\t" + "ldr w3, [in, #24] \n\t" + "ldr w7, [in, #28] \n\t" + "enc_loop: \n\t" + "/*add round const s0 s1*/ \n\t" + "ldrb s0, [rc] \n\t" + "LSR s1, s0, #4 \n\t" + "and s0, s0, 0xf \n\t" + "eors w4, w4, s0 \n\t" + "eors w0, w0, s1 \n\t" + "/*sbox first column*/ \n\t" + "mvns w0, w0 \n\t" + "ands s0, w1, w0 \n\t" + "eors s0, w2, s0 \n\t" + "orrs w2, w1, w2 \n\t" + "eors w0, w3, w0 \n\t" + "eors w2, w2, w0 \n\t" + "eors s1, w1, w3 \n\t" + "eors w3, w3, s0 \n\t" + "ands w0, s0, w0 \n\t" + "eors w0, s1, w0 \n\t" + "ands w1, w2, s1 \n\t" + "eors w1, s0, w1 \n\t" + "/*sbox second column*/ \n\t" + "mvns w4, w4 \n\t" + "ands s0, w5, w4 \n\t" + "eors s0, w6, s0 \n\t" + "orrs w6, w5, w6 \n\t" + "eors w4, w7, w4 \n\t" + "eors w6, w6, w4 \n\t" + "eors s1, w5, w7 \n\t" + "eors w7, w7, s0 \n\t" + "ands w4, s0, w4 \n\t" + "eors w4, s1, w4 \n\t" + "ands w5, w6, s1 \n\t" + "eors w5, s0, w5 \n\t" + "/*rotate shift left 1 bit*/ \n\t" + "mov s0, w5 \n\t" + "ROR w5, w1, #31 \n\t" + "mov w1, s0 \n\t" + "/*rotate shift left 8 bits*/ \n\t" + "ROR w2, w2, #28 \n\t" + "ROR w6, w6, #28 \n\t" + "/*rotate shift left 25 bits*/ \n\t" + "mov s0, w3 \n\t" + "ROR w3, w7, #20 \n\t" + "ROR w7, s0, #19 \n\t" + "/*loop control*/ \n\t" + "adds rc, rc, #1 \n\t" + "subs rounds, rounds, #1 \n\t" + "bne enc_loop \n\t" + "str w0, [in] \n\t" + "str w4, [in, #4] \n\t" + "str w1, [in, #8] \n\t" + "str w5, [in, #12] \n\t" + "str w2, [in, #16] \n\t" + "str w6, [in, #20] \n\t" + "str w3, [in, #24] \n\t" + "str w7, [in, #28] \n\t" + ); +} + + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + unsigned int i, j; + u32 s[8] = { 0 }; + u32 dataFormat[2] = { 0 }; + u8 tempData[8]; + u32 s_temp[8] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + *clen = mlen + CRYPTO_ABYTES; + //initialization + packFormat(s, npub); + packFormat((s + 2), (npub + 8)); + packFormat((s + 4), k); + packFormat((s + 6), (k + 8)); + permutation256(s,PR0_ROUNDS,constant6Format); + // process associated data + if (adlen) { + while (adlen >= RATE) { + packFormat(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + permutation256(s,PR_ROUNDS,constant6Format); + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); +memcpy(tempData, ad, adlen * sizeof(unsigned char)); +tempData[adlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + permutation256(s,PR_ROUNDS,constant6Format); + } + s[6] ^= 0x80000000; + if (mlen) { + while (mlen >= RATE) { + packFormat(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(c, s); + permutation256(s,PR_ROUNDS,constant6Format); + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, sizeof(tempData)); +memcpy(tempData, m, mlen * sizeof(unsigned char)); + +tempData[mlen]= 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c +=mlen; + } + // finalization + permutation256(s,PRF_ROUNDS,constant6Format); + // return tag + unpackFormat(tempData, s); + memcpy(c, tempData, sizeof(tempData)); + unpackFormat(tempData,(s + 2)); + memcpy(c+8, tempData, sizeof(tempData)); +// unpackFormat((c), s); +// unpackFormat((c+8),(s + 2)); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u8 i, j; + // initialization + u32 s[8] = { 0 }; + u32 dataFormat[4] = { 0 }; + u32 dataFormat_1[2] = { 0 }; + u8 tempU8[32] = { 0 }; + u8 tempData[8]; + u32 s_temp[8] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + //initialization + packFormat(s, npub); + packFormat((s + 2), (npub + 8)); + packFormat((s + 4), k); + packFormat((s + 6), (k + 8)); + permutation256(s,PR0_ROUNDS,constant6Format); + // process associated data + if (adlen) { + while (adlen >= RATE) { + packFormat(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + permutation256(s,PR_ROUNDS,constant6Format); + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + permutation256(s,PR_ROUNDS,constant6Format); + } + s[6] ^= 0x80000000; + // process c + clen = clen - CRYPTO_KEYBYTES; + if (clen) { + while (clen >= RATE) { + packFormat(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + unpackFormat(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + permutation256(s,PR_ROUNDS,constant6Format); + clen -= RATE; + m += RATE; + c += RATE; + } + unpackFormat(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i]^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packFormat(s, tempU8); + } + // finalization + permutation256(s,PRF_ROUNDS,constant6Format); + // return tag + packFormat(dataFormat, c); + packFormat((dataFormat + 2), (c +8)); + if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3]) { + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_1/api.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_1/api.h new file mode 100644 index 0000000..95fe44a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_1/api.h @@ -0,0 +1,9 @@ +#ifndef KNOT_API_H +#define KNOT_API_H +//k=n=tag=128 b=384 r=192 c=192 +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#endif diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_1/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_1/crypto_aead.h new file mode 100644 index 0000000..8f53846 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_1/crypto_aead.h @@ -0,0 +1,18 @@ +#ifndef KNOT_CRYPTO_AEAD_H +#define KNOT_CRYPTO_AEAD_H +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); +#endif diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_1/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_1/encrypt.c new file mode 100644 index 0000000..2450c35 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_1/encrypt.c @@ -0,0 +1,422 @@ +#include +#include +#include +#include +#include "crypto_aead.h" +#include "api.h" + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) + +#define KNOT_CIPHER 1 +#if defined(KNOT_CIPHER) && (KNOT_CIPHER == 1) +unsigned char constant7[127] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, + 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, + 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, + 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, + 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, + 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, + 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37, 0x6f, + 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, 0x1a, 0x34, + 0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c, 0x39, + 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a, 0x15, 0x2a, 0x55, 0x2b, 0x57, + 0x2f, 0x5f, 0x3f, 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40 }; + +/* State + * w8 w4 w0 + * w9 w5 w1 + * w10 w6 w2 + * w11 w7 w3 + * + * Sbox + t1 = ~a; + t2 = b & t1; + t3 = c ^ t2; + h = d ^ t3; + t5 = b | c; + t6 = d ^ t1; + g = t5 ^ t6; + t8 = b ^ d; + t9 = t3 & t6; + e = t8 ^ t9; + t11 = g & t8; + f = t3 ^ t11; + * + * Sbox after change + a = ~a; + s0 = b & a; + s0 = c ^ s0; + c = b | c; + a = d ^ a; + c = c ^ a; + s1 = b ^ d; + d = d ^ s0; + a = s0 & a; + a = s1 ^ a; + b = c & s1; + b = s0 ^ b; + */ +static void permutation384(unsigned char *in, int rounds, unsigned char *rc) { + uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11; + uint32_t s0, s1, s2; + uint32_t one = 0x1; + uint32_t ff = 0xff; + uint32_t value; + __asm volatile( + "ldr w0, [in] \n\t" + "ldr w4, [in, #4] \n\t" + "ldr w8, [in, #8] \n\t" + "ldr w1, [in, #12] \n\t" + "ldr w5, [in, #16] \n\t" + "ldr w9, [in, #20] \n\t" + "ldr w2, [in, #24] \n\t" + "ldr w6, [in, #28] \n\t" + "ldr w10, [in, #32] \n\t" + "ldr w3, [in, #36] \n\t" + "ldr w7, [in, #40] \n\t" + "ldr w11, [in, #44] \n\t" + "mov s0, 0xfff \n\t" + "mov value, 0x7ff \n\t" + "lsl value, value, #12 \n\t" + "eors value, value, s0 \n\t" + "enc_loop: \n\t" + "/*add round const*/ \n\t" + "ldrb s0, [rc] \n\t" + "eors w0, w0, s0 \n\t" + "/*sbox first column*/ \n\t" + "mvns w0, w0 \n\t" + "ands s0, w1, w0 \n\t" + "eors s0, w2, s0 \n\t" + "orrs w2, w1, w2 \n\t" + "eors w0, w3, w0 \n\t" + "eors w2, w2, w0 \n\t" + "eors s1, w1, w3 \n\t" + "eors w3, w3, s0 \n\t" + "ands w0, s0, w0 \n\t" + "eors w0, s1, w0 \n\t" + "ands w1, w2, s1 \n\t" + "eors w1, s0, w1 \n\t" + "/*sbox second column*/ \n\t" + "mvns w4, w4 \n\t" + "ands s0, w5, w4 \n\t" + "eors s0, w6, s0 \n\t" + "orrs w6, w5, w6 \n\t" + "eors w4, w7, w4 \n\t" + "eors w6, w6, w4 \n\t" + "eors s1, w5, w7 \n\t" + "eors w7, w7, s0 \n\t" + "ands w4, s0, w4 \n\t" + "eors w4, s1, w4 \n\t" + "ands w5, w6, s1 \n\t" + "eors w5, s0, w5 \n\t" + "/*sbox third column*/ \n\t" + "mvns w8, w8 \n\t" + "ands s0, w9, w8 \n\t" + "eors s0, w10, s0 \n\t" + "orrs w10, w9, w10 \n\t" + "eors w8, w11, w8 \n\t" + "eors w10, w10, w8 \n\t" + "eors s1, w9, w11 \n\t" + "eors w11, w11, s0 \n\t" + "ands w8, s0, w8 \n\t" + "eors w8, s1, w8 \n\t" + "ands w9, w10, s1 \n\t" + "eors w9, s0, w9 \n\t" + "/*rotate shift left 1 bit*/ \n\t" + "ror s0, w1, #31 \n\t" + "ands s0, s0, one \n\t" + "lsl w1, w1, #1 \n\t" + "ror s1, w9, #31 \n\t" + "ands s1, s1, one \n\t" + "eors w1, w1, s1 \n\t" + "ror s2, w5, #31 \n\t" + "ands s2, s2, one \n\t" + "lsl w5, w5, #1 \n\t" + "eors w5, w5, s0 \n\t" + "lsl w9, w9, #1 \n\t" + "eors w9, w9, s2 \n\t" + "/*rotate shift left 8 bits*/ \n\t" + "ror s0, w2, #24 \n\t" + "ands s0, s0, ff \n\t" + "lsl w2, w2, #8 \n\t" + "ror s1, w10, #24 \n\t" + "ands s1, s1, ff \n\t" + "eors w2, w2, s1 \n\t" + "ror s2, w6, #24 \n\t" + "ands s2, s2, ff \n\t" + "lsl w6, w6, #8 \n\t" + "eors w6, w6, s0 \n\t" + "lsl w10, w10, #8 \n\t" + "eors w10, w10, s2 \n\t" + "/*rotate shift left 55 bits*/ \n\t" + "ror s0, w11, #9 \n\t" + "ands s0, s0, value \n\t" + "lsl w11, w11, #23 \n\t" + "ror s1, w7, #9 \n\t" + "ands s1, s1, value \n\t" + "eors w11, w11, s1 \n\t" + "ror s2, w3, #9 \n\t" + "ands s2, s2, value \n\t" + "lsl w3, w3, #23 \n\t" + "eors w3, w3, s0 \n\t" + "lsl w7, w7, #23 \n\t" + "eors w7, w7, s2 \n\t" + "mov s0, w3 \n\t" + "mov w3, w11 \n\t" + "mov w11, w7 \n\t" + "mov w7, s0 \n\t" + "/*loop control*/ \n\t" + "adds rc, rc, #1 \n\t" + "subs rounds, rounds, #1 \n\t" + "bne enc_loop \n\t" + "str w0, [in] \n\t" + "str w4, [in, #4] \n\t" + "str w8, [in, #8] \n\t" + "str w1, [in, #12] \n\t" + "str w5, [in, #16] \n\t" + "str w9, [in, #20] \n\t" + "str w2, [in, #24] \n\t" + "str w6, [in, #28] \n\t" + "str w10, [in, #32] \n\t" + "str w3, [in, #36] \n\t" + "str w7, [in, #40] \n\t" + "str w11, [in, #44] \n\t" + ); +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + unsigned int u = 0; + unsigned int v = 0; + unsigned int v1 = 0; + unsigned int last_index = 0; + unsigned int i; + unsigned char *A = NULL; + unsigned char *M = NULL; + unsigned char S[48]; + unsigned int *A32 = NULL; + unsigned int *M32 = NULL; + unsigned int *S32 = NULL; + unsigned int *C32 = NULL; + + // pad associated data + if (adlen != 0) { + u = adlen / 24 + 1; + A = malloc(u * 24); + if (A == NULL) { + return -1; + } + memset(A, 0, u * 24); + memcpy(A, ad, adlen); + A[adlen] = 0x01; + A32 = (unsigned int *)A; + } + + // pad plaintext data + if (mlen != 0) { + v = mlen / 24 + 1; + M = malloc(v * 24); + if (M == NULL) { + free(A); + return -1; + } + memset(M, 0, v * 24); + memcpy(M, m, mlen); + M[mlen] = 0x01; + M32 = (unsigned int *)M; + } + + // initalization + memcpy(S, npub, CRYPTO_NPUBBYTES); + memcpy(S + CRYPTO_NPUBBYTES, k, CRYPTO_KEYBYTES); + memset(S + CRYPTO_NPUBBYTES + CRYPTO_KEYBYTES, 0, CRYPTO_KEYBYTES); + S[47] ^= 0x80; + permutation384(S, 76, constant7); + S32 = (unsigned int *)S; + + // processiong associated data + if (adlen != 0) { + for (i = 0; i < u; i++) { + S32[0] ^= A32[0]; + S32[1] ^= A32[1]; + S32[2] ^= A32[2]; + S32[3] ^= A32[3]; + S32[4] ^= A32[4]; + S32[5] ^= A32[5]; + A32 = A32 + 6; + permutation384(S, 28, constant7); + } + } + S[47] ^= 0x80; + + // Encryption processiong plaintext data + if (mlen != 0) { + C32 = (unsigned int *)c; + for (i = 0; i < v - 1; i++) { + S32[0] ^= M32[0]; + S32[1] ^= M32[1]; + S32[2] ^= M32[2]; + S32[3] ^= M32[3]; + S32[4] ^= M32[4]; + S32[5] ^= M32[5]; + M32 = M32 + 6; + C32[0] = S32[0]; + C32[1] = S32[1]; + C32[2] = S32[2]; + C32[3] = S32[3]; + C32[4] = S32[4]; + C32[5] = S32[5]; + C32 = C32 + 6; + permutation384(S, 28, constant7); + } + v1 = mlen % 24; + last_index = (v - 1) * 24; + for (i = 0; i < v1; i++) { + S[i] ^= M[last_index + i]; + c[last_index + i] = S[i]; + } + S[i] ^= 0x01; + } + + // finalization + permutation384(S, 32, constant7); + + // return tag + memcpy(c + mlen, S, CRYPTO_ABYTES); + *clen = mlen + CRYPTO_ABYTES; + if (A != NULL) { + free(A); + } + if (M != NULL) { + free(M); + } + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + unsigned int u = 0; + unsigned int v = 0; + unsigned int v1 = 0; + unsigned int last_index = 0; + unsigned int i; + unsigned char *A = NULL; + unsigned char S[48]; + unsigned int *A32 = NULL; + unsigned int *M32 = NULL; + unsigned int *S32 = NULL; + unsigned int *C32 = NULL; + + *mlen = 0; + if (clen < CRYPTO_ABYTES) { + return -1; + } + + // pad associated data + if (adlen != 0) { + u = adlen / 24 + 1; + A = malloc(u * 24); + if (A == NULL) { + return -1; + } + memset(A, 0, u * 24); + memcpy(A, ad, adlen); + A[adlen] = 0x01; + A32 = (unsigned int *)A; + } + + M32 = (unsigned int *)m; + C32 = (unsigned int *)c; + + // initalization + memcpy(S, npub, CRYPTO_NPUBBYTES); + memcpy(S + CRYPTO_NPUBBYTES, k, CRYPTO_KEYBYTES); + memset(S + CRYPTO_NPUBBYTES + CRYPTO_KEYBYTES, 0, CRYPTO_KEYBYTES); + S[47] ^= 0x80; + permutation384(S, 76, constant7); + S32 = (unsigned int *)S; + + // processiong associated data + if (adlen != 0) { + for (i = 0; i < u; i++) { + S32[0] ^= A32[0]; + S32[1] ^= A32[1]; + S32[2] ^= A32[2]; + S32[3] ^= A32[3]; + S32[4] ^= A32[4]; + S32[5] ^= A32[5]; + A32 = A32 + 6; + permutation384(S, 28, constant7); + } + } + S[47] ^= 0x80; + + // Encryption processiong ciphertext data + if (clen != CRYPTO_ABYTES) { + C32 = (unsigned int *)c; + v = (clen - CRYPTO_ABYTES) / 24 + 1; + for (i = 0; i < v - 1; i++) { + M32[0] = S32[0] ^ C32[0]; + M32[1] = S32[1] ^ C32[1]; + M32[2] = S32[2] ^ C32[2]; + M32[3] = S32[3] ^ C32[3]; + M32[4] = S32[4] ^ C32[4]; + M32[5] = S32[5] ^ C32[5]; + S32[0] = C32[0]; + S32[1] = C32[1]; + S32[2] = C32[2]; + S32[3] = C32[3]; + S32[4] = C32[4]; + S32[5] = C32[5]; + M32 = M32 + 6; + C32 = C32 + 6; + permutation384(S, 28, constant7); + } + v1 = (clen - CRYPTO_ABYTES) % 24; + last_index = (v - 1) * 24; + for (i = 0; i < v1; i++) { + m[last_index + i] = S[i] ^ c[last_index + i]; + S[i] = c[last_index + i]; + } + S[i] ^= 0x01; + } + + // finalization + permutation384(S, 32, constant7); + + // return -1 if verification fails + for (i = 0; i < CRYPTO_ABYTES; i++) { + if (c[clen - CRYPTO_ABYTES + i] != S[i]) { + memset(m, 0, clen - CRYPTO_ABYTES); + return -1; + } + } + *mlen = clen - CRYPTO_ABYTES; + if (A != NULL) { + free(A); + } + return 0; +} +#else +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + return 0; +} +#endif diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/api.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/api.h new file mode 100644 index 0000000..d8257f4 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/api.h @@ -0,0 +1,7 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h new file mode 100644 index 0000000..df30da6 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/auxFormat.h @@ -0,0 +1,149 @@ +//#include +#include"crypto_aead.h" +#include"api.h" + +#include +#include +#include +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + +//////////////////puck begin +//&:5 <<:4 |:4 +#define puckU32ToThree(x){\ +x &= 0x92492492;\ +x = (x | (x << 2)) & 0xc30c30c3;\ +x = (x | (x << 4)) & 0xf00f00f0;\ +x = (x | (x << 8)) & 0xff0000ff;\ +x = (x | (x << 16)) & 0xfff00000;\ +} +#define unpuckU32ToThree(x){\ +x &= 0xfff00000;\ +x = (x | (x >> 16)) & 0xff0000ff;\ +x = (x | (x >> 8)) & 0xf00f00f0;\ +x = (x | (x >> 4)) & 0xc30c30c3;\ +x = (x | (x >> 2)) & 0x92492492;\ +} +//使用 u8 t2_64, t2_65;u32 temp2[3];t2; +#define packU32FormatToThreePacket( out, in) {\ +t2 = U32BIG(((u32*)in)[0]); \ +t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t2 = t2 << 2; \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp2[0] >> 22); \ +out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} +//t9 t1 t2 t1_32 t2_64 t2_65 temp0[3] temp1[3] temp2[3] +#define packU96FormatToThreePacket(out, in) {\ +t9 = U32BIG(((u32*)in)[2]); \ +t1 = U32BIG(((u32*)in)[1]); \ +t2 = U32BIG(((u32*)in)[0]); \ +t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t1 = t1 << 1; \ +t2 = t2 << 2; \ +temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \ +puckU32ToThree(temp0[0]); \ +puckU32ToThree(temp0[1]); \ +puckU32ToThree(temp0[2]); \ +temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ +puckU32ToThree(temp1[0]); \ +puckU32ToThree(temp1[1]); \ +puckU32ToThree(temp1[2]); \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \ +out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} + //使用 u8 t2_64, t2_65;u32 temp2[3];t2; +#define unpackU32FormatToThreePacket(out, in) {\ +temp2[0] = (in[0] & 0x000003ff) << 22; \ +t2_64 = ((in[1] & 0x00000400) << 21); \ +temp2[1] = (in[1] & 0x000003ff) << 22; \ +t2_65 = ((in[2] & 0x00000400) << 20); \ +temp2[2] = (in[2] & 0x000003ff) << 22; \ +unpuckU32ToThree(temp2[0]); \ +unpuckU32ToThree(temp2[1]); \ +unpuckU32ToThree(temp2[2]); \ +t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ +*(u32*)(out) = U32BIG(t2); \ +} +//u32 temp0[3] = { 0 };u32 temp1[3] = { 0 };u32 temp2[3] = { 0 };u32 t1_32, t2_64, t2_65;t9,t1,t2, +#define unpackU96FormatToThreePacket( out, in) {\ +temp0[0] = in[0] & 0xffe00000; \ +temp1[0] = (in[0] & 0x001ffc00) << 11; \ +temp2[0] = (in[0] & 0x000003ff) << 22; \ +temp0[1] = in[1] & 0xffe00000; \ +temp1[1] = (in[1] & 0x001ff800) << 11; \ +t2_64 = ((in[1] & 0x00000400) << 21); \ +temp2[1] = (in[1] & 0x000003ff) << 22; \ +temp0[2] = in[2] & 0xffc00000; \ +t1_32 = ((in[2] & 0x00200000) << 10); \ +temp1[2] = (in[2] & 0x001ff800) << 11; \ +t2_65 = ((in[2] & 0x00000400) << 20); \ +temp2[2] = (in[2] & 0x000003ff) << 22; \ +unpuckU32ToThree(temp0[0]); \ +unpuckU32ToThree(temp0[1]); \ +unpuckU32ToThree(temp0[2]); \ +t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \ +unpuckU32ToThree(temp1[0]); \ +unpuckU32ToThree(temp1[1]); \ +unpuckU32ToThree(temp1[2]); \ +t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \ +unpuckU32ToThree(temp2[0]); \ +unpuckU32ToThree(temp2[1]); \ +unpuckU32ToThree(temp2[2]); \ +t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ +*(u32*)(out) = U32BIG(t2); \ +*(u32*)(out + 4) = U32BIG(t1); \ +*(u32*)(out + 8) = U32BIG(t9); \ +} + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define sbox(a, b, c, d, e, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + + +#define U96_BIT_LOTR32_1(t0,t1,t2,t3,t4,t5){\ +t3= t1;\ +t4 = t2;\ +t5 = LOTR32(t0, 1); \ +} +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +//55=3*18+1 +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} +/* +s0 s1 s2 +s3 s4 s5 +s6 s7 s8 +s9 s10 s11 +*/ + +void printU32State(char name[], u32* var, long len); +void printfU96Format(char name[], u32 * s); +//////////////////puck end +void printU8(char name[], u8 var[], int len, int offset); +void printfU96Format(char name[], u32 * s); diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/crypto_aead.h new file mode 100644 index 0000000..cdfdf19 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/crypto_aead.h @@ -0,0 +1,17 @@ +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c new file mode 100644 index 0000000..bea9f64 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_2/encrypt.c @@ -0,0 +1,333 @@ + +#include"auxFormat.h" + +#define aead_RATE (192 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 + +unsigned char constant7Format[127] = { + /*constant7Format[127]:*/ + 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, + 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, + 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, + 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, + 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, + 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, + 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, + 0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, + 0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46, + 0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb, + 0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, }; +/* State + * w8 w4 w0 + * w9 w5 w1 + * w10 w6 w2 + * w11 w7 w3 + */ + static void permutation384(unsigned int *in, int rounds, unsigned char *rc) { + + uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11; + uint32_t s0, s1, s2; + uint32_t i=0; + __asm volatile( + "ldr w0, [in] \n\t" + "ldr w4, [in, #4] \n\t" + "ldr w8, [in, #8] \n\t" + "ldr w1, [in, #12] \n\t" + "ldr w5, [in, #16] \n\t" + "ldr w9, [in, #20] \n\t" + "ldr w2, [in, #24] \n\t" + "ldr w6, [in, #28] \n\t" + "ldr w10, [in, #32] \n\t" + "ldr w3, [in, #36] \n\t" + "ldr w7, [in, #40] \n\t" + "ldr w11, [in, #44] \n\t" + "enc_loop: \n\t" + "/*add round const s0 s1*/ \n\t" + "ldrb s0, [rc] \n\t" + "LSR s1, s0, #6 \n\t" + "and s1, s1, 0x3 \n\t" + "LSR s2, s0, #3 \n\t" + "and s2, s2, 0x7 \n\t" + "and s0, s0, 0x7 \n\t" + "eors w8, w8, s0 \n\t" + "eors w4, w4, s2 \n\t" + "eors w0, w0, s1 \n\t" + "/*sbox first column*/ \n\t" + "mvns w0, w0 \n\t" + "ands s0, w1, w0 \n\t" + "eors s0, w2, s0 \n\t" + "orrs w2, w1, w2 \n\t" + "eors w0, w3, w0 \n\t" + "eors w2, w2, w0 \n\t" + "eors s1, w1, w3 \n\t" + "eors w3, w3, s0 \n\t" + "ands w0, s0, w0 \n\t" + "eors w0, s1, w0 \n\t" + "ands w1, w2, s1 \n\t" + "eors w1, s0, w1 \n\t" + "/*sbox second column*/ \n\t" + "mvns w4, w4 \n\t" + "ands s0, w5, w4 \n\t" + "eors s0, w6, s0 \n\t" + "orrs w6, w5, w6 \n\t" + "eors w4, w7, w4 \n\t" + "eors w6, w6, w4 \n\t" + "eors s1, w5, w7 \n\t" + "eors w7, w7, s0 \n\t" + "ands w4, s0, w4 \n\t" + "eors w4, s1, w4 \n\t" + "ands w5, w6, s1 \n\t" + "eors w5, s0, w5 \n\t" + "/*sbox third column*/ \n\t" + "mvns w8, w8 \n\t" + "ands s0, w9, w8 \n\t" + "eors s0, w10, s0 \n\t" + "orrs w10, w9, w10 \n\t" + "eors w8, w11, w8 \n\t" + "eors w10, w10, w8 \n\t" + "eors s1, w9, w11 \n\t" + "eors w11, w11, s0 \n\t" + "ands w8, s0, w8 \n\t" + "eors w8, s1, w8 \n\t" + "ands w9, w10, s1 \n\t" + "eors w9, s0, w9 \n\t" + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t" + "mov s0, w1 \n\t" + "mov w1, w5 \n\t" + "mov w5, w9 \n\t" + "ROR w9, s0, #31 \n\t" + "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t" + "mov s0, w10 \n\t" + "ROR w10, w6 , #29 \n\t" + "ROR w6, w2 , #29 \n\t" + "ROR w2, s0, #30 \n\t" + "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t" + "mov s0, w3 \n\t" + "ROR w3, w7 , #14 \n\t" + "ROR w7, w11 , #14 \n\t" + "ROR w11, s0, #13 \n\t" + "/*loop control*/ \n\t" + "adds rc, rc, #1 \n\t" + "subs rounds, rounds, #1 \n\t" + "bne enc_loop \n\t" + "str w0, [in] \n\t" + "str w4, [in, #4] \n\t" + "str w8, [in, #8] \n\t" + "str w1, [in, #12] \n\t" + "str w5, [in, #16] \n\t" + "str w9, [in, #20] \n\t" + "str w2, [in, #24] \n\t" + "str w6, [in, #28] \n\t" + "str w10, [in, #32] \n\t" + "str w3, [in, #36] \n\t" + "str w7, [in, #40] \n\t" + "str w11, [in, #44] \n\t" + ); +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u8 i; + u32 s[12] = { 0 }; + u8 tempData[24] = { 0 }; + u32 dataFormat[6] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + + *clen = mlen + CRYPTO_ABYTES; + // initialization + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub+12, sizeof(unsigned char)*4); + memcpy(tempData+4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket((s + 3), tempData); + packU96FormatToThreePacket((s + 6), (tempData+12)); + + s[9] = 0x80000000; + permutation384(s,PR0_ROUNDS,constant7Format); + // process associated data + if (adlen) { + // rlen = adlen; + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat+3), (ad+12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + permutation384(s,PR_ROUNDS,constant7Format); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + permutation384(s,PR_ROUNDS,constant7Format); + } + s[9] ^= 0x80000000; + if (mlen) { + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (m + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket((c+12), (s+3)); + permutation384(s,PR_ROUNDS,constant7Format); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen]= 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + unpackU96FormatToThreePacket(tempData, s); + unpackU96FormatToThreePacket((tempData+12), (s+3)); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + permutation384(s,PRF_ROUNDS,constant7Format); + // return tag + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket(tempData, (s + 3)); + memcpy(c+12, tempData, sizeof(unsigned char) * 4); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + + u8 i, j; + u32 s[12] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 dataFormat[12] = { 0 }; + u32 dataFormat_1[12] = { 0 }; + u8 tempData[24] = { 0 }; + u8 tempU8[24] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); + memcpy(tempData + 4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket((s + 3), tempData); + packU96FormatToThreePacket((s + 6), (tempData + 12)); + + s[9] = 0x80000000; + permutation384(s,PR0_ROUNDS,constant7Format); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (ad + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + permutation384(s,PR_ROUNDS,constant7Format); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + permutation384(s,PR_ROUNDS,constant7Format); + } + s[9] ^= 0x80000000; + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + packU96FormatToThreePacket((dataFormat+3), (c+12)); + dataFormat_1[3] = s[3] ^ dataFormat[3]; + dataFormat_1[4] = s[4] ^ dataFormat[4]; + dataFormat_1[5] = s[5] ^ dataFormat[5]; + unpackU96FormatToThreePacket(m, dataFormat_1); + unpackU96FormatToThreePacket((m + 12), (dataFormat_1 + 3)); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; + s[4] = dataFormat[4]; + s[5] = dataFormat[5]; + permutation384(s,PR_ROUNDS,constant7Format); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8+12), (s+3)); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU96FormatToThreePacket(s, tempU8); + packU96FormatToThreePacket((s + 3), (tempU8 + 12)); + } + // finalization + permutation384(s,PRF_ROUNDS,constant7Format); + // return tag + + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8+12), (s+3)); + if (U32BIG(((u32*)tempU8)[0]) != U32BIG(((u32*)c)[0]) || + U32BIG(((u32*)tempU8)[1]) != U32BIG(((u32*)c)[1]) || + U32BIG(((u32*)tempU8)[2]) != U32BIG(((u32*)c)[2]) || + U32BIG(((u32*)tempU8)[3]) != U32BIG(((u32*)c)[3]) ){ + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_1/api.h b/knot/Implementations/crypto_aead/knot192/armcortexm_1/api.h new file mode 100644 index 0000000..1378c37 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_1/api.h @@ -0,0 +1,9 @@ +#ifndef KNOT_API_H +#define KNOT_API_H +//k=n=tag=128 b=384 r=96 c=288 +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 +#endif diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_1/crypto_aead.h b/knot/Implementations/crypto_aead/knot192/armcortexm_1/crypto_aead.h new file mode 100644 index 0000000..8f53846 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_1/crypto_aead.h @@ -0,0 +1,18 @@ +#ifndef KNOT_CRYPTO_AEAD_H +#define KNOT_CRYPTO_AEAD_H +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); +#endif diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_1/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_1/encrypt.c new file mode 100644 index 0000000..84be8a7 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_1/encrypt.c @@ -0,0 +1,400 @@ +#include +#include +#include +#include +#include "crypto_aead.h" +#include "api.h" + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) + +#define KNOT_CIPHER 1 +#if defined(KNOT_CIPHER) && (KNOT_CIPHER == 1) +unsigned char constant7[127] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, + 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, + 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, + 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, + 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, + 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, + 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37, 0x6f, + 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, 0x1a, 0x34, + 0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c, 0x39, + 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a, 0x15, 0x2a, 0x55, 0x2b, 0x57, + 0x2f, 0x5f, 0x3f, 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40 }; + +/* State + * w8 w4 w0 + * w9 w5 w1 + * w10 w6 w2 + * w11 w7 w3 + * + * Sbox + t1 = ~a; + t2 = b & t1; + t3 = c ^ t2; + h = d ^ t3; + t5 = b | c; + t6 = d ^ t1; + g = t5 ^ t6; + t8 = b ^ d; + t9 = t3 & t6; + e = t8 ^ t9; + t11 = g & t8; + f = t3 ^ t11; + * + * Sbox after change + a = ~a; + s0 = b & a; + s0 = c ^ s0; + c = b | c; + a = d ^ a; + c = c ^ a; + s1 = b ^ d; + d = d ^ s0; + a = s0 & a; + a = s1 ^ a; + b = c & s1; + b = s0 ^ b; + */ +static void permutation384(unsigned char *in, int rounds, unsigned char *rc) { + uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11; + uint32_t s0, s1, s2; + uint32_t one = 0x1; + uint32_t ff = 0xff; + uint32_t value; + __asm volatile( + "ldr w0, [in] \n\t" + "ldr w4, [in, #4] \n\t" + "ldr w8, [in, #8] \n\t" + "ldr w1, [in, #12] \n\t" + "ldr w5, [in, #16] \n\t" + "ldr w9, [in, #20] \n\t" + "ldr w2, [in, #24] \n\t" + "ldr w6, [in, #28] \n\t" + "ldr w10, [in, #32] \n\t" + "ldr w3, [in, #36] \n\t" + "ldr w7, [in, #40] \n\t" + "ldr w11, [in, #44] \n\t" + "mov s0, 0xfff \n\t" + "mov value, 0x7ff \n\t" + "lsl value, value, #12 \n\t" + "eors value, value, s0 \n\t" + "enc_loop: \n\t" + "/*add round const*/ \n\t" + "ldrb s0, [rc] \n\t" + "eors w0, w0, s0 \n\t" + "/*sbox first column*/ \n\t" + "mvns w0, w0 \n\t" + "ands s0, w1, w0 \n\t" + "eors s0, w2, s0 \n\t" + "orrs w2, w1, w2 \n\t" + "eors w0, w3, w0 \n\t" + "eors w2, w2, w0 \n\t" + "eors s1, w1, w3 \n\t" + "eors w3, w3, s0 \n\t" + "ands w0, s0, w0 \n\t" + "eors w0, s1, w0 \n\t" + "ands w1, w2, s1 \n\t" + "eors w1, s0, w1 \n\t" + "/*sbox second column*/ \n\t" + "mvns w4, w4 \n\t" + "ands s0, w5, w4 \n\t" + "eors s0, w6, s0 \n\t" + "orrs w6, w5, w6 \n\t" + "eors w4, w7, w4 \n\t" + "eors w6, w6, w4 \n\t" + "eors s1, w5, w7 \n\t" + "eors w7, w7, s0 \n\t" + "ands w4, s0, w4 \n\t" + "eors w4, s1, w4 \n\t" + "ands w5, w6, s1 \n\t" + "eors w5, s0, w5 \n\t" + "/*sbox third column*/ \n\t" + "mvns w8, w8 \n\t" + "ands s0, w9, w8 \n\t" + "eors s0, w10, s0 \n\t" + "orrs w10, w9, w10 \n\t" + "eors w8, w11, w8 \n\t" + "eors w10, w10, w8 \n\t" + "eors s1, w9, w11 \n\t" + "eors w11, w11, s0 \n\t" + "ands w8, s0, w8 \n\t" + "eors w8, s1, w8 \n\t" + "ands w9, w10, s1 \n\t" + "eors w9, s0, w9 \n\t" + "/*rotate shift left 1 bit*/ \n\t" + "ror s0, w1, #31 \n\t" + "ands s0, s0, one \n\t" + "lsl w1, w1, #1 \n\t" + "ror s1, w9, #31 \n\t" + "ands s1, s1, one \n\t" + "eors w1, w1, s1 \n\t" + "ror s2, w5, #31 \n\t" + "ands s2, s2, one \n\t" + "lsl w5, w5, #1 \n\t" + "eors w5, w5, s0 \n\t" + "lsl w9, w9, #1 \n\t" + "eors w9, w9, s2 \n\t" + "/*rotate shift left 8 bits*/ \n\t" + "ror s0, w2, #24 \n\t" + "ands s0, s0, ff \n\t" + "lsl w2, w2, #8 \n\t" + "ror s1, w10, #24 \n\t" + "ands s1, s1, ff \n\t" + "eors w2, w2, s1 \n\t" + "ror s2, w6, #24 \n\t" + "ands s2, s2, ff \n\t" + "lsl w6, w6, #8 \n\t" + "eors w6, w6, s0 \n\t" + "lsl w10, w10, #8 \n\t" + "eors w10, w10, s2 \n\t" + "/*rotate shift left 55 bits*/ \n\t" + "ror s0, w11, #9 \n\t" + "ands s0, s0, value \n\t" + "lsl w11, w11, #23 \n\t" + "ror s1, w7, #9 \n\t" + "ands s1, s1, value \n\t" + "eors w11, w11, s1 \n\t" + "ror s2, w3, #9 \n\t" + "ands s2, s2, value \n\t" + "lsl w3, w3, #23 \n\t" + "eors w3, w3, s0 \n\t" + "lsl w7, w7, #23 \n\t" + "eors w7, w7, s2 \n\t" + "mov s0, w3 \n\t" + "mov w3, w11 \n\t" + "mov w11, w7 \n\t" + "mov w7, s0 \n\t" + "/*loop control*/ \n\t" + "adds rc, rc, #1 \n\t" + "subs rounds, rounds, #1 \n\t" + "bne enc_loop \n\t" + "str w0, [in] \n\t" + "str w4, [in, #4] \n\t" + "str w8, [in, #8] \n\t" + "str w1, [in, #12] \n\t" + "str w5, [in, #16] \n\t" + "str w9, [in, #20] \n\t" + "str w2, [in, #24] \n\t" + "str w6, [in, #28] \n\t" + "str w10, [in, #32] \n\t" + "str w3, [in, #36] \n\t" + "str w7, [in, #40] \n\t" + "str w11, [in, #44] \n\t" + ); +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + unsigned int u = 0; + unsigned int v = 0; + unsigned int v1 = 0; + unsigned int last_index = 0; + unsigned int i; + unsigned char *A = NULL; + unsigned char *M = NULL; + unsigned char S[48]; + unsigned int *A32 = NULL; + unsigned int *M32 = NULL; + unsigned int *S32 = NULL; + unsigned int *C32 = NULL; + + // pad associated data + if (adlen != 0) { + u = adlen / 12 + 1; + A = malloc(u * 12); + if (A == NULL) { + return -1; + } + memset(A, 0, u * 12); + memcpy(A, ad, adlen); + A[adlen] = 0x01; + A32 = (unsigned int *)A; + } + + // pad plaintext data + if (mlen != 0) { + v = mlen / 12 + 1; + M = malloc(v * 12); + if (M == NULL) { + free(A); + return -1; + } + memset(M, 0, v * 12); + memcpy(M, m, mlen); + M[mlen] = 0x01; + M32 = (unsigned int *)M; + } + + // initalization + memcpy(S, npub, CRYPTO_NPUBBYTES); + memcpy(S + CRYPTO_NPUBBYTES, k, CRYPTO_KEYBYTES); + permutation384(S, 76, constant7); + S32 = (unsigned int *)S; + + // processiong associated data + if (adlen != 0) { + for (i = 0; i < u; i++) { + S32[0] ^= A32[0]; + S32[1] ^= A32[1]; + S32[2] ^= A32[2]; + A32 = A32 + 3; + permutation384(S, 40, constant7); + } + } + S[47] ^= 0x80; + + // Encryption processiong plaintext data + if (mlen != 0) { + C32 = (unsigned int *)c; + for (i = 0; i < v - 1; i++) { + S32[0] ^= M32[0]; + S32[1] ^= M32[1]; + S32[2] ^= M32[2]; + M32 = M32 + 3; + C32[0] = S32[0]; + C32[1] = S32[1]; + C32[2] = S32[2]; + C32 = C32 + 3; + permutation384(S, 40, constant7); + } + v1 = mlen % 12; + last_index = (v - 1) * 12; + for (i = 0; i < v1; i++) { + S[i] ^= M[last_index + i]; + c[last_index + i] = S[i]; + } + S[i] ^= 0x01; + } + + // finalization + permutation384(S, 44, constant7); + + // return tag + memcpy(c + mlen, S, CRYPTO_ABYTES); + *clen = mlen + CRYPTO_ABYTES; + if (A != NULL) { + free(A); + } + if (M != NULL) { + free(M); + } + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + unsigned int u = 0; + unsigned int v = 0; + unsigned int v1 = 0; + unsigned int last_index = 0; + unsigned int i; + unsigned char *A = NULL; + unsigned char S[48]; + unsigned int *A32 = NULL; + unsigned int *M32 = NULL; + unsigned int *S32 = NULL; + unsigned int *C32 = NULL; + + *mlen = 0; + if (clen < CRYPTO_ABYTES) { + return -1; + } + + // pad associated data + if (adlen != 0) { + u = adlen / 12 + 1; + A = malloc(u * 12); + if (A == NULL) { + return -1; + } + memset(A, 0, u * 12); + memcpy(A, ad, adlen); + A[adlen] = 0x01; + A32 = (unsigned int *)A; + } + + M32 = (unsigned int *)m; + C32 = (unsigned int *)c; + + // initalization + memcpy(S, npub, CRYPTO_NPUBBYTES); + memcpy(S + CRYPTO_NPUBBYTES, k, CRYPTO_KEYBYTES); + permutation384(S, 76, constant7); + S32 = (unsigned int *)S; + + // processiong associated data + if (adlen != 0) { + for (i = 0; i < u; i++) { + S32[0] ^= A32[0]; + S32[1] ^= A32[1]; + S32[2] ^= A32[2]; + A32 = A32 + 3; + permutation384(S, 40, constant7); + } + } + S[47] ^= 0x80; + + // Encryption processiong ciphertext data + if (clen != CRYPTO_ABYTES) { + C32 = (unsigned int *)c; + v = (clen - CRYPTO_ABYTES) / 12 + 1; + for (i = 0; i < v - 1; i++) { + M32[0] = S32[0] ^ C32[0]; + M32[1] = S32[1] ^ C32[1]; + M32[2] = S32[2] ^ C32[2]; + S32[0] = C32[0]; + S32[1] = C32[1]; + S32[2] = C32[2]; + M32 = M32 + 3; + C32 = C32 + 3; + permutation384(S, 40, constant7); + } + v1 = (clen - CRYPTO_ABYTES) % 12; + last_index = (v - 1) * 12; + for (i = 0; i < v1; i++) { + m[last_index + i] = S[i] ^ c[last_index + i]; + S[i] = c[last_index + i]; + } + S[i] ^= 0x01; + } + + // finalization + permutation384(S, 44, constant7); + + // return -1 if verification fails + for (i = 0; i < CRYPTO_ABYTES; i++) { + if (c[clen - CRYPTO_ABYTES + i] != S[i]) { + memset(m, 0, clen - CRYPTO_ABYTES); + return -1; + } + } + *mlen = clen - CRYPTO_ABYTES; + if (A != NULL) { + free(A); + } + return 0; +} +#else +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + return 0; +} +#endif diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/api.h b/knot/Implementations/crypto_aead/knot192/armcortexm_2/api.h new file mode 100644 index 0000000..c3cb1d9 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h new file mode 100644 index 0000000..d9dd414 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/auxFormat.h @@ -0,0 +1,251 @@ +//#include +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#include +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define sbox(a, b, c, d, e, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +//////////////////puck begin +//&:5 <<:4 |:4 +#define puckU32ToThree(x){\ +x &= 0x92492492;\ +x = (x | (x << 2)) & 0xc30c30c3;\ +x = (x | (x << 4)) & 0xf00f00f0;\ +x = (x | (x << 8)) & 0xff0000ff;\ +x = (x | (x << 16)) & 0xfff00000;\ +} +#define unpuckU32ToThree(x){\ +x &= 0xfff00000;\ +x = (x | (x >> 16)) & 0xff0000ff;\ +x = (x | (x >> 8)) & 0xf00f00f0;\ +x = (x | (x >> 4)) & 0xc30c30c3;\ +x = (x | (x >> 2)) & 0x92492492;\ +} +//u32 t1 、u32 t2 、u8 t2_64 , t2_65 ;u32 temp1[3] = { 0 };u32 temp2[3] = { 0 }; +#define packU48FormatToThreePacket( out, in) {\ +t1 = (u32)U16BIG(*(u16*)(in + 4)); \ +t2 = U32BIG(*(u32*)(in)); \ +t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t1 = t1 << 1; \ +t2 = t2 << 2; \ +temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ +puckU32ToThree(temp1[0]); \ +puckU32ToThree(temp1[1]); \ +puckU32ToThree(temp1[2]); \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp1[0] >> 11) | (temp2[0] >> 22); \ +out[1] = (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] = (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} + + +/* +void packU96FormatToThreePacket(u32 * out, u8 * in) { +u32 t0 = U32BIG(((u32*)in)[2]); +u32 t1 = U32BIG(((u32*)in)[1]); +u32 t2 = U32BIG(((u32*)in)[0]); +u8 t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; +t1 = t1 << 1; +t2 = t2 << 2; +u32 temp0[3] = { 0 }; +temp0[0] = t0; temp0[1] = t0 << 1; temp0[2] = t0 << 2; +puckU32ToThree(temp0[0]); +puckU32ToThree(temp0[1]); +puckU32ToThree(temp0[2]); +u32 temp1[3] = { 0 }; +temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; +puckU32ToThree(temp1[0]); +puckU32ToThree(temp1[1]); +puckU32ToThree(temp1[2]); +u32 temp2[3] = { 0 }; +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; +puckU32ToThree(temp2[0]); +puckU32ToThree(temp2[1]); +puckU32ToThree(temp2[2]); +out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); +out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); +out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); +} +*/ +//t9 t1 t2 t1_32 t2_64 t2_65 temp0[3] temp1[3] temp2[3] +#define packU96FormatToThreePacket(out, in) {\ +t9 = U32BIG(((u32*)in)[2]); \ +t1 = U32BIG(((u32*)in)[1]); \ +t2 = U32BIG(((u32*)in)[0]); \ +t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t1 = t1 << 1; \ +t2 = t2 << 2; \ +temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \ +puckU32ToThree(temp0[0]); \ +puckU32ToThree(temp0[1]); \ +puckU32ToThree(temp0[2]); \ +temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ +puckU32ToThree(temp1[0]); \ +puckU32ToThree(temp1[1]); \ +puckU32ToThree(temp1[2]); \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \ +out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} +/* +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { +u32 temp0[3] = { 0 }; +u32 temp1[3] = { 0 }; +u32 temp2[3] = { 0 }; +u32 t1_32, t2_64, t2_65; +u32 t0, t1, t2; +temp0[0] = in[0] & 0xffe00000; +temp1[0] = (in[0] & 0x001ffc00) << 11; +temp2[0] = (in[0] & 0x000003ff) << 22; +temp0[1] = in[1] & 0xffe00000; +temp1[1] = (in[1] & 0x001ff800) << 11; +t2_64 = ((in[1] & 0x00000400) << 21); +temp2[1] = (in[1] & 0x000003ff) << 22; +temp0[2] = in[2] & 0xffc00000; +t1_32 = ((in[2] & 0x00200000) << 10); +temp1[2] = (in[2] & 0x001ff800) << 11; +t2_65 = ((in[2] & 0x00000400) << 20); +temp2[2] = (in[2] & 0x000003ff) << 22; +unpuckU32ToThree(temp0[0]); +unpuckU32ToThree(temp0[1]); +unpuckU32ToThree(temp0[2]); +t0 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; +unpuckU32ToThree(temp1[0]); +unpuckU32ToThree(temp1[1]); +unpuckU32ToThree(temp1[2]); +t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); +unpuckU32ToThree(temp2[0]); +unpuckU32ToThree(temp2[1]); +unpuckU32ToThree(temp2[2]); +t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); +*(u32*)(out) = U32BIG(t2); +*(u32*)(out + 4) = U32BIG(t1); +*(u32*)(out + 8) = U32BIG(t0); +} +*/ +//u32 temp0[3] = { 0 };u32 temp1[3] = { 0 };u32 temp2[3] = { 0 };u32 t1_32, t2_64, t2_65;t9,t1,t2, +#define unpackU96FormatToThreePacket( out, in) {\ +temp0[0] = in[0] & 0xffe00000; \ +temp1[0] = (in[0] & 0x001ffc00) << 11; \ +temp2[0] = (in[0] & 0x000003ff) << 22; \ +temp0[1] = in[1] & 0xffe00000; \ +temp1[1] = (in[1] & 0x001ff800) << 11; \ +t2_64 = ((in[1] & 0x00000400) << 21); \ +temp2[1] = (in[1] & 0x000003ff) << 22; \ +temp0[2] = in[2] & 0xffc00000; \ +t1_32 = ((in[2] & 0x00200000) << 10); \ +temp1[2] = (in[2] & 0x001ff800) << 11; \ +t2_65 = ((in[2] & 0x00000400) << 20); \ +temp2[2] = (in[2] & 0x000003ff) << 22; \ +unpuckU32ToThree(temp0[0]); \ +unpuckU32ToThree(temp0[1]); \ +unpuckU32ToThree(temp0[2]); \ +t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \ +unpuckU32ToThree(temp1[0]); \ +unpuckU32ToThree(temp1[1]); \ +unpuckU32ToThree(temp1[2]); \ +t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \ +unpuckU32ToThree(temp2[0]); \ +unpuckU32ToThree(temp2[1]); \ +unpuckU32ToThree(temp2[2]); \ +t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ +*(u32*)(out) = U32BIG(t2); \ +*(u32*)(out + 4) = U32BIG(t1); \ +*(u32*)(out + 8) = U32BIG(t9); \ +} + +#define U96_BIT_LOTR32_1(t0,t1,t2,t3,t4,t5){\ +t3= t1;\ +t4 = t2;\ +t5 = LOTR32(t0, 1); \ +} +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +//55=3*18+1 +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} +/* +s0 s1 s2 +s3 s4 s5 +s6 s7 s8 +s9 s10 s11 +*/ +#define ROUND384(lunNum) {\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[0], s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s_temp[1], s_temp[4], s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s_temp[2], s_temp[5], s_temp[8], s_temp[11]);\ +s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2];\ +U96_BIT_LOTR32_1(s_temp[3], s_temp [4], s_temp[ 5], s[3], s[4], s[5]);\ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +} +#define ROUND384Full(lunNum) {\ +printf(" constant7Format[%d]=%08x\n", lunNum, constant7Format[lunNum]);\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ + printfU96Format("addition of round constant output",s);\ +sbox(s[0], s[3], s[6], s[9] , s_temp[0], s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s_temp[1], s_temp[4], s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s_temp[2], s_temp[5], s_temp[8], s_temp[11]);\ + printfU96Format("substitution layer output",s_temp);\ +s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2];\ +U96_BIT_LOTR32_1(s_temp[3], s_temp [4], s_temp[ 5], s[3], s[4], s[5]);\ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +printfU96Format("linear diffusion layer output", s);\ +} + +void printBinarySimp(unsigned char * str, u8 *a, int len); +//void packU96FormatToThreePacketFull(unsigned int * out, u8 * in); +//void unpackU96FormatToThreePacketFull(u8 * out, unsigned int * in); +//void packU96FormatToThreePacket(u32 * out, u8 * in); +//void unpackU96FormatToThreePacket(u8 * out, u32 * in); +void printU32State(char name[], unsigned int* var, long len); +void printfU96Format(char name[], unsigned int * s); +//////////////////puck end +void printU8(char name[], u8 var[], int len, int offset); +void printfU96Format(char name[], u32 * s); + +////////////constant begin// +//unsigned char constant7Format[127]; +void puckU8FormatToThreePacket(u8 in, u8 *out); +//void test_puckU8FormatToThreePacket(); +////////////constant end// + + +static void permutation384(unsigned int *in, int rounds, unsigned char *rc); diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/crypto_aead.h b/knot/Implementations/crypto_aead/knot192/armcortexm_2/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c new file mode 100644 index 0000000..ab4ac31 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_2/encrypt.c @@ -0,0 +1,287 @@ + +#include"auxFormat.h" + +#define aead_RATE (96 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 +unsigned char constant7Format[127] = { + /*constant7Format[127]:*/ +0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, +0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, +0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, +0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, +0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, +0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, +0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, +0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, +0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46, +0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb, +0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, }; + +/* State + * w8 w4 w0 + * w9 w5 w1 + * w10 w6 w2 + * w11 w7 w3 + */ + static void permutation384(unsigned int *in, int rounds, unsigned char *rc) { + + uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11; + uint32_t s0, s1, s2; + uint32_t i=0; + __asm volatile( + "ldr w0, [in] \n\t" + "ldr w4, [in, #4] \n\t" + "ldr w8, [in, #8] \n\t" + "ldr w1, [in, #12] \n\t" + "ldr w5, [in, #16] \n\t" + "ldr w9, [in, #20] \n\t" + "ldr w2, [in, #24] \n\t" + "ldr w6, [in, #28] \n\t" + "ldr w10, [in, #32] \n\t" + "ldr w3, [in, #36] \n\t" + "ldr w7, [in, #40] \n\t" + "ldr w11, [in, #44] \n\t" + "enc_loop: \n\t" + "/*add round const s0 s1*/ \n\t" + "ldrb s0, [rc] \n\t" + "LSR s1, s0, #6 \n\t" + "and s1, s1, 0x3 \n\t" + "LSR s2, s0, #3 \n\t" + "and s2, s2, 0x7 \n\t" + "and s0, s0, 0x7 \n\t" + "eors w8, w8, s0 \n\t" + "eors w4, w4, s2 \n\t" + "eors w0, w0, s1 \n\t" + "/*sbox first column*/ \n\t" + "mvns w0, w0 \n\t" + "ands s0, w1, w0 \n\t" + "eors s0, w2, s0 \n\t" + "orrs w2, w1, w2 \n\t" + "eors w0, w3, w0 \n\t" + "eors w2, w2, w0 \n\t" + "eors s1, w1, w3 \n\t" + "eors w3, w3, s0 \n\t" + "ands w0, s0, w0 \n\t" + "eors w0, s1, w0 \n\t" + "ands w1, w2, s1 \n\t" + "eors w1, s0, w1 \n\t" + "/*sbox second column*/ \n\t" + "mvns w4, w4 \n\t" + "ands s0, w5, w4 \n\t" + "eors s0, w6, s0 \n\t" + "orrs w6, w5, w6 \n\t" + "eors w4, w7, w4 \n\t" + "eors w6, w6, w4 \n\t" + "eors s1, w5, w7 \n\t" + "eors w7, w7, s0 \n\t" + "ands w4, s0, w4 \n\t" + "eors w4, s1, w4 \n\t" + "ands w5, w6, s1 \n\t" + "eors w5, s0, w5 \n\t" + "/*sbox third column*/ \n\t" + "mvns w8, w8 \n\t" + "ands s0, w9, w8 \n\t" + "eors s0, w10, s0 \n\t" + "orrs w10, w9, w10 \n\t" + "eors w8, w11, w8 \n\t" + "eors w10, w10, w8 \n\t" + "eors s1, w9, w11 \n\t" + "eors w11, w11, s0 \n\t" + "ands w8, s0, w8 \n\t" + "eors w8, s1, w8 \n\t" + "ands w9, w10, s1 \n\t" + "eors w9, s0, w9 \n\t" + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t" + "mov s0, w1 \n\t" + "mov w1, w5 \n\t" + "mov w5, w9 \n\t" + "ROR w9, s0, #31 \n\t" + "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t" + "mov s0, w10 \n\t" + "ROR w10, w6 , #29 \n\t" + "ROR w6, w2 , #29 \n\t" + "ROR w2, s0, #30 \n\t" + "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t" + "mov s0, w3 \n\t" + "ROR w3, w7 , #14 \n\t" + "ROR w7, w11 , #14 \n\t" + "ROR w11, s0, #13 \n\t" + "/*loop control*/ \n\t" + "adds rc, rc, #1 \n\t" + "subs rounds, rounds, #1 \n\t" + "bne enc_loop \n\t" + "str w0, [in] \n\t" + "str w4, [in, #4] \n\t" + "str w8, [in, #8] \n\t" + "str w1, [in, #12] \n\t" + "str w5, [in, #16] \n\t" + "str w9, [in, #20] \n\t" + "str w2, [in, #24] \n\t" + "str w6, [in, #28] \n\t" + "str w10, [in, #32] \n\t" + "str w3, [in, #36] \n\t" + "str w7, [in, #40] \n\t" + "str w11, [in, #44] \n\t" + ); +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + + u8 i; + u32 s[12] = { 0 }; + u32 dataFormat[3] = { 0 }; + u8 tempData[12] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); +permutation384(s,PR0_ROUNDS,constant7Format); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; +permutation384(s,PR_ROUNDS,constant7Format); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + permutation384(s,PR_ROUNDS,constant7Format); + } + s[9] ^= 0x80000000; + if (mlen) { + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(c, s); + permutation384(s,PR_ROUNDS,constant7Format); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(tempData, s); + memcpy(c, tempData, mlen); + c += mlen; + } + // finalization + permutation384(s,PRF_ROUNDS,constant7Format); + // return tag + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket((c + 12), (s + 3)); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u8 i, j; + u32 s[12] = { 0 }; + u32 dataFormat[6] = { 0 }; + u32 dataFormat_1[3] = { 0 }; + u8 tempData[12] = { 0 }; + u8 tempU8[48] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); +permutation384(s,PR0_ROUNDS,constant7Format); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; +permutation384(s,PR_ROUNDS,constant7Format); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + permutation384(s,PR_ROUNDS,constant7Format); + } + s[9] ^= 0x80000000; + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + unpackU96FormatToThreePacket(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + permutation384(s,PR_ROUNDS,constant7Format); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU96FormatToThreePacket(s, tempU8); + } + // finalization + permutation384(s,PRF_ROUNDS,constant7Format); + // return tag + packU96FormatToThreePacket(dataFormat, c); + packU96FormatToThreePacket((dataFormat + 3), (c + 12)); + if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3] + || dataFormat[4] != s[4] || dataFormat[5] != s[5]) { + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_1/api.h b/knot/Implementations/crypto_aead/knot256/armcortexm_1/api.h new file mode 100644 index 0000000..6e53e33 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_1/api.h @@ -0,0 +1,9 @@ +#ifndef KNOT_API_H +#define KNOT_API_H +//k=n=tag=256 b=512 r=128 c=384 +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 32 +#define CRYPTO_ABYTES 32 +#define CRYPTO_NOOVERLAP 1 +#endif diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_1/crypto_aead.h b/knot/Implementations/crypto_aead/knot256/armcortexm_1/crypto_aead.h new file mode 100644 index 0000000..8f53846 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_1/crypto_aead.h @@ -0,0 +1,18 @@ +#ifndef KNOT_CRYPTO_AEAD_H +#define KNOT_CRYPTO_AEAD_H +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); +#endif diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_1/encrypt.c b/knot/Implementations/crypto_aead/knot256/armcortexm_1/encrypt.c new file mode 100644 index 0000000..f9ea5f1 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_1/encrypt.c @@ -0,0 +1,444 @@ +#include +#include +#include +#include +#include "crypto_aead.h" +#include "api.h" + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) + +#define KNOT_CIPHER 1 +#if defined(KNOT_CIPHER) && (KNOT_CIPHER == 1) +unsigned char constant7[127] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, + 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, + 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, + 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, + 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, + 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, + 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37, 0x6f, + 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, 0x1a, 0x34, + 0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c, 0x39, + 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a, 0x15, 0x2a, 0x55, 0x2b, 0x57, + 0x2f, 0x5f, 0x3f, 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40 }; + +#define load64(x1, x0, in) \ + "ldr x0, [in] \n\t" \ + "ldr x1, [in, #4] \n\t" + +#define store64(x1, x0, out) \ + "str x0, [out] \n\t" \ + "str x1, [out, #4] \n\t" + +/* State + * w12 w8 w4 w0 + * w13 w9 w5 w1 + * w14 w10 w6 w2 + * w15 w11 w7 w3 + * + * Sbox + t1 = ~a; + t2 = b & t1; + t3 = c ^ t2; + h = d ^ t3; + t5 = b | c; + t6 = d ^ t1; + g = t5 ^ t6; + t8 = b ^ d; + t9 = t3 & t6; + e = t8 ^ t9; + t11 = g & t8; + f = t3 ^ t11; + * + * Sbox after change + a = ~a; + s0 = b & a; + s0 = c ^ s0; + c = b | c; + a = d ^ a; + c = c ^ a; + s1 = b ^ d; + d = d ^ s0; + a = s0 & a; + a = s1 ^ a; + b = c & s1; + b = s0 ^ b; + */ +static void permutation512(unsigned char *in, int rounds, unsigned char *rc) { + uint32_t w0, w1, w2, w3, w4, w5, w6, w7; + uint32_t w8, w9, w10, w11, w12, w13, w14, w15; + uint32_t s0, s1; + uint32_t one = 0x1; + uint32_t ffff = 0xffff; + uint32_t value; + __asm volatile( + "ldr w0, [in] \n\t" + "ldr w4, [in, #4] \n\t" + "ldr w8, [in, #8] \n\t" + "ldr w12, [in, #12] \n\t" + "ldr w1, [in, #16] \n\t" + "ldr w5, [in, #20] \n\t" + "ldr w9, [in, #24] \n\t" + "ldr w13, [in, #28] \n\t" + "ldr w2, [in, #32] \n\t" + "ldr w6, [in, #36] \n\t" + "ldr w10, [in, #40] \n\t" + "ldr w14, [in, #44] \n\t" + "ldr w3, [in, #48] \n\t" + "ldr w7, [in, #52] \n\t" + "ldr w11, [in, #56] \n\t" + "ldr w15, [in, #60] \n\t" + "mov s0, 0xfff \n\t" + "mov value, 0x1fff \n\t" + "lsl value, value, #12 \n\t" + "eors value, value, s0 \n\t" + "enc_loop: \n\t" + "/*add round const*/ \n\t" + "ldrb s0, [rc] \n\t" + "eors w0, w0, s0 \n\t" + "/*sbox first column*/ \n\t" + "mvns w0, w0 \n\t" + "ands s0, w1, w0 \n\t" + "eors s0, w2, s0 \n\t" + "orrs w2, w1, w2 \n\t" + "eors w0, w3, w0 \n\t" + "eors w2, w2, w0 \n\t" + "eors s1, w1, w3 \n\t" + "eors w3, w3, s0 \n\t" + "ands w0, s0, w0 \n\t" + "eors w0, s1, w0 \n\t" + "ands w1, w2, s1 \n\t" + "eors w1, s0, w1 \n\t" + "/*sbox second column*/ \n\t" + "mvns w4, w4 \n\t" + "ands s0, w5, w4 \n\t" + "eors s0, w6, s0 \n\t" + "orrs w6, w5, w6 \n\t" + "eors w4, w7, w4 \n\t" + "eors w6, w6, w4 \n\t" + "eors s1, w5, w7 \n\t" + "eors w7, w7, s0 \n\t" + "ands w4, s0, w4 \n\t" + "eors w4, s1, w4 \n\t" + "ands w5, w6, s1 \n\t" + "eors w5, s0, w5 \n\t" + "/*sbox third column*/ \n\t" + "mvns w8, w8 \n\t" + "ands s0, w9, w8 \n\t" + "eors s0, w10, s0 \n\t" + "orrs w10, w9, w10 \n\t" + "eors w8, w11, w8 \n\t" + "eors w10, w10, w8 \n\t" + "eors s1, w9, w11 \n\t" + "eors w11, w11, s0 \n\t" + "ands w8, s0, w8 \n\t" + "eors w8, s1, w8 \n\t" + "ands w9, w10, s1 \n\t" + "eors w9, s0, w9 \n\t" + "/*sbox forth column*/ \n\t" + "mvns w12, w12 \n\t" + "ands s0, w13, w12 \n\t" + "eors s0, w14, s0 \n\t" + "orrs w14, w13, w14 \n\t" + "eors w12, w15, w12 \n\t" + "eors w14, w14, w12 \n\t" + "eors s1, w13, w15 \n\t" + "eors w15, w15, s0 \n\t" + "ands w12, s0, w12 \n\t" + "eors w12, s1, w12 \n\t" + "ands w13, w14, s1 \n\t" + "eors w13, s0, w13 \n\t" + "/*rotate shift left 1 bit*/ \n\t" + "ror s0, w1, #31 \n\t" + "ands s0, s0, one \n\t" + "lsl w1, w1, #1 \n\t" + "ror s1, w13,#31 \n\t" + "ands s1, s1, one \n\t" + "eors w1, w1, s1 \n\t" + "ror s1, w9, #31 \n\t" + "ands s1, s1, one \n\t" + "lsl w13, w13,#1 \n\t" + "eors w13, w13,s1 \n\t" + "ror s1, w5, #31 \n\t" + "ands s1, s1, one \n\t" + "lsl w9, w9, #1 \n\t" + "eors w9, w9, s1 \n\t" + "lsl w5, w5, #1 \n\t" + "eors w5, w5, s0 \n\t" + "/*rotate shift left 16 bits*/ \n\t" + "ror s0, w2, #16 \n\t" + "ands s0, s0, ffff \n\t" + "lsl w2, w2, #16 \n\t" + "ror s1, w14,#16 \n\t" + "ands s1, s1, ffff \n\t" + "eors w2, w2, s1 \n\t" + "ror s1, w10,#16 \n\t" + "ands s1, s1, ffff \n\t" + "lsl w14, w14,#16 \n\t" + "eors w14, w14,s1 \n\t" + "ror s1, w6, #16 \n\t" + "ands s1, s1, ffff \n\t" + "lsl w10, w10,#16 \n\t" + "eors w10, w10,s1 \n\t" + "lsl w6, w6, #16 \n\t" + "eors w6, w6, s0 \n\t" + "/*rotate shift left 25 bits*/ \n\t" + "ror s0, w3, #7 \n\t" + "ands s0, s0, value \n\t" + "lsl w3, w3, #25 \n\t" + "ror s1, w15,#7 \n\t" + "ands s1, s1, value \n\t" + "eors w3, w3, s1 \n\t" + "ror s1, w11,#7 \n\t" + "ands s1, s1, value \n\t" + "lsl w15, w15,#25 \n\t" + "eors w15, w15,s1 \n\t" + "ror s1, w7, #7 \n\t" + "ands s1, s1, value \n\t" + "lsl w11, w11,#25 \n\t" + "eors w11, w11,s1 \n\t" + "lsl w7, w7, #25 \n\t" + "eors w7, w7, s0 \n\t" + "/*loop control*/ \n\t" + "adds rc, rc, #1 \n\t" + "subs rounds, rounds, #1 \n\t" + "bne enc_loop \n\t" + "str w0, [in] \n\t" + "str w4, [in, #4] \n\t" + "str w8, [in, #8] \n\t" + "str w12, [in, #12] \n\t" + "str w1, [in, #16] \n\t" + "str w5, [in, #20] \n\t" + "str w9, [in, #24] \n\t" + "str w13, [in, #28] \n\t" + "str w2, [in, #32] \n\t" + "str w6, [in, #36] \n\t" + "str w10, [in, #40] \n\t" + "str w14, [in, #44] \n\t" + "str w3, [in, #48] \n\t" + "str w7, [in, #52] \n\t" + "str w11, [in, #56] \n\t" + "str w15, [in, #60] \n\t" + ); +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + unsigned int u = 0; + unsigned int v = 0; + unsigned int v1 = 0; + unsigned int last_index = 0; + unsigned int i; + unsigned char *A = NULL; + unsigned char *M = NULL; + unsigned char S[64]; + unsigned int *A32 = NULL; + unsigned int *M32 = NULL; + unsigned int *S32 = NULL; + unsigned int *C32 = NULL; + + // pad associated data + if (adlen != 0) { + u = adlen / 16 + 1; + A = malloc(u * 16); + if (A == NULL) { + return -1; + } + memset(A, 0, u * 16); + memcpy(A, ad, adlen); + A[adlen] = 0x01; + A32 = (unsigned int *)A; + } + + // pad plaintext data + if (mlen != 0) { + v = mlen / 16 + 1; + M = malloc(v * 16); + if (M == NULL) { + free(A); + return -1; + } + memset(M, 0, v * 16); + memcpy(M, m, mlen); + M[mlen] = 0x01; + M32 = (unsigned int *)M; + } + + // initalization + memcpy(S, npub, CRYPTO_NPUBBYTES); + memcpy(S + CRYPTO_NPUBBYTES, k, CRYPTO_KEYBYTES); + permutation512(S, 100, constant7); + S32 = (unsigned int *)S; + + // processiong associated data + if (adlen != 0) { + for (i = 0; i < u; i++) { + S32[0] ^= A32[0]; + S32[1] ^= A32[1]; + S32[2] ^= A32[2]; + S32[3] ^= A32[3]; + A32 = A32 + 4; + permutation512(S, 52, constant7); + } + } + S[63] ^= 0x80; + + // Encryption processiong plaintext data + if (mlen != 0) { + C32 = (unsigned int *)c; + for (i = 0; i < v - 1; i++) { + S32[0] ^= M32[0]; + S32[1] ^= M32[1]; + S32[2] ^= M32[2]; + S32[3] ^= M32[3]; + M32 = M32 + 4; + C32[0] = S32[0]; + C32[1] = S32[1]; + C32[2] = S32[2]; + C32[3] = S32[3]; + C32 = C32 + 4; + permutation512(S, 52, constant7); + } + v1 = mlen % 16; + last_index = (v - 1) * 16; + for (i = 0; i < v1; i++) { + S[i] ^= M[last_index + i]; + c[last_index + i] = S[i]; + } + S[i] ^= 0x01; + } + + // finalization + permutation512(S, 56, constant7); + + // return tag + memcpy(c + mlen, S, CRYPTO_ABYTES); + *clen = mlen + CRYPTO_ABYTES; + if (A != NULL) { + free(A); + } + if (M != NULL) { + free(M); + } + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + unsigned int u = 0; + unsigned int v = 0; + unsigned int v1 = 0; + unsigned int last_index = 0; + unsigned int i; + unsigned char *A = NULL; + unsigned char S[64]; + unsigned int *A32 = NULL; + unsigned int *M32 = NULL; + unsigned int *S32 = NULL; + unsigned int *C32 = NULL; + + *mlen = 0; + if (clen < CRYPTO_ABYTES) { + return -1; + } + + // pad associated data + if (adlen != 0) { + u = adlen / 16 + 1; + A = malloc(u * 16); + if (A == NULL) { + return -1; + } + memset(A, 0, u * 16); + memcpy(A, ad, adlen); + A[adlen] = 0x01; + A32 = (unsigned int *)A; + } + + M32 = (unsigned int *)m; + C32 = (unsigned int *)c; + + // initalization + memcpy(S, npub, CRYPTO_NPUBBYTES); + memcpy(S + CRYPTO_NPUBBYTES, k, CRYPTO_KEYBYTES); + permutation512(S, 100, constant7); + S32 = (unsigned int *)S; + + // processiong associated data + if (adlen != 0) { + for (i = 0; i < u; i++) { + S32[0] ^= A32[0]; + S32[1] ^= A32[1]; + S32[2] ^= A32[2]; + S32[3] ^= A32[3]; + A32 = A32 + 4; + permutation512(S, 52, constant7); + } + } + S[63] ^= 0x80; + + // Encryption processiong ciphertext data + if (clen != CRYPTO_ABYTES) { + C32 = (unsigned int *)c; + v = (clen - CRYPTO_ABYTES) / 16 + 1; + for (i = 0; i < v - 1; i++) { + M32[0] = S32[0] ^ C32[0]; + M32[1] = S32[1] ^ C32[1]; + M32[2] = S32[2] ^ C32[2]; + M32[3] = S32[3] ^ C32[3]; + S32[0] = C32[0]; + S32[1] = C32[1]; + S32[2] = C32[2]; + S32[3] = C32[3]; + M32 = M32 + 4; + C32 = C32 + 4; + permutation512(S, 52, constant7); + } + v1 = (clen - CRYPTO_ABYTES) % 16; + last_index = (v - 1) * 16; + for (i = 0; i < v1; i++) { + m[last_index + i] = S[i] ^ c[last_index + i]; + S[i] = c[last_index + i]; + } + S[i] ^= 0x01; + } + + // finalization + permutation512(S, 56, constant7); + + // return -1 if verification fails + for (i = 0; i < CRYPTO_ABYTES; i++) { + if (c[clen - CRYPTO_ABYTES + i] != S[i]) { + memset(m, 0, clen - CRYPTO_ABYTES); + return -1; + } + } + *mlen = clen - CRYPTO_ABYTES; + if (A != NULL) { + free(A); + } + return 0; +} +#else +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + return 0; +} +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_2/api.h b/knot/Implementations/crypto_aead/knot256/armcortexm_2/api.h new file mode 100644 index 0000000..3eb57e5 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_2/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 32 //256/8=32 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 32 +#define CRYPTO_ABYTES 32 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h new file mode 100644 index 0000000..d7d877f --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_2/auxFormat.h @@ -0,0 +1,115 @@ +//#include +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + +#define sbox(a, b, c, d, e, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +void printfU128Format(char name[], u32 * in); +void printU8(char name[], u8 var[], long len, int offset); + + +//使用t9 +#define puck32(in)\ +{\ +t9 = (in ^ (in >> 1)) & 0x22222222; in ^= t9 ^ (t9 << 1);\ +t9 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= t9 ^ (t9 << 2);\ +t9 = (in ^ (in >> 4)) & 0x00F000F0; in ^= t9 ^ (t9 << 4);\ +t9 = (in ^ (in >> 8)) & 0x0000FF00; in ^= t9 ^ (t9 << 8);\ +} +//使用t9 +#define unpuck32(t0){\ + t9 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= t9 ^ (t9 << 8); \ + t9 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= t9 ^ (t9 << 4); \ + t9 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= t9 ^ (t9 << 2); \ + t9 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= t9 ^ (t9 << 1); \ +} +//u32 t1, t2, t3,t8, +#define packU128FormatToFourPacket(out,in) {\ + t8 = U32BIG(((u32*)in)[0]); \ + t1 = U32BIG(((u32*)in)[1]); \ + t2 = U32BIG(((u32*)in)[2]); \ + t3 = U32BIG(((u32*)in)[3]); \ + puck32(t8); puck32(t8); \ + puck32(t1); puck32(t1); \ + puck32(t2); puck32(t2); \ + puck32(t3); puck32(t3); \ + out[3] = t3 & 0xff000000 | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t8 >> 24); \ + out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) | ((t1 >> 8) & 0x0000ff00) | ((t8 >> 16) & 0x000000ff); \ + out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) | (t1 & 0x0000ff00) | ((t8 >> 8) & 0x000000ff); \ + out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) | ((t1 << 8) & 0x0000ff00) | (t8 & 0x000000ff); \ +} +//u32 dataFormat[4],u32 t1, t2, t3,t8, +#define unpackU128FormatToFourPacket( out, in) {\ +memcpy(dataFormat, in, sizeof(unsigned int) * 4); \ +t3 = dataFormat[3] & 0xff000000 | ((dataFormat[2] >> 8) & 0x00ff0000) | ((dataFormat[1] >> 16) & 0x0000ff00) | (dataFormat[0] >> 24); \ +t2 = ((dataFormat[3] << 8) & 0xff000000) | (dataFormat[2] & 0x00ff0000) | ((dataFormat[1] >> 8) & 0x0000ff00) | ((dataFormat[0] >> 16) & 0x000000ff); \ +t1 = ((dataFormat[3] << 16) & 0xff000000) | ((dataFormat[2] << 8) & 0x00ff0000) | (dataFormat[1] & 0x0000ff00) | ((dataFormat[0] >> 8) & 0x000000ff); \ +t8 = ((dataFormat[3] << 24) & 0xff000000) | ((dataFormat[2] << 16) & 0x00ff0000) | ((dataFormat[1] << 8) & 0x0000ff00) | (dataFormat[0] & 0x000000ff); \ +unpuck32(t8); unpuck32(t8); \ +unpuck32(t1); unpuck32(t1); \ +unpuck32(t2); unpuck32(t2); \ +unpuck32(t3); unpuck32(t3); \ +((u32*)out)[0] = U32BIG(t8); \ +((u32*)out)[1] = U32BIG(t1); \ +((u32*)out)[2] = U32BIG(t2); \ +((u32*)out)[3] = U32BIG(t3); \ +} +//u32 t1 ;u32 t2 = +#define packU64FormatToFourPacket( out, in) {\ +t1 = U32BIG(((u32*)in)[0]); \ +t2 = U32BIG(((u32*)in)[1]); \ +puck32(t1); \ +puck32(t1); \ +puck32(t2); \ +puck32(t2); \ +out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); \ +out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); \ +out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); \ +out[0] = ((t2 << 8) & 0x0000ff00) | (t1 & 0x000000ff); \ +} +#define BIT_LOTR32_1(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t3, 1);\ +t5 = t0;\ +t6 = t1; \ +t7 = t2; \ +} +#define BIT_LOTR32_16(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t0, 4);\ +t5 = LOTR32(t1, 4);\ +t6 = LOTR32(t2, 4); \ +t7 = LOTR32(t3, 4); \ +} +#define BIT_LOTR32_25(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t3, 7);\ +t5 = LOTR32(t0, 6);\ +t6 = LOTR32(t1, 6); \ +t7 = LOTR32(t2, 6); \ +} + +#define ROUND512( arr,lunNum) {\ +s[3] ^= (arr[lunNum] >> 6) & 0x3;\ +s[2] ^= (arr[lunNum] >> 4) & 0x3;\ +s[1] ^= (arr[lunNum] >> 2) & 0x3;\ +s[0] ^= arr[lunNum] & 0x3;\ +sbox(s[0], s[4], s[8], s[12], s_temp[0], s_temp[4], s_temp[8], s_temp[12]);\ +sbox(s[1], s[5], s[9], s[13], s_temp[1], s_temp[5], s_temp[9], s_temp[13]);\ +sbox(s[2], s[6], s[10], s[14], s_temp[2], s_temp[6], s_temp[10], s_temp[14]);\ +sbox(s[3], s[7], s[11], s[15], s_temp[3], s_temp[7], s_temp[11], s_temp[15]);\ +s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2], s[3] = s_temp[3];\ +BIT_LOTR32_1(s_temp[4], s_temp[5], s_temp[6], s_temp[7], s[4], s[5], s[6], s[7]);\ +BIT_LOTR32_16(s_temp[8], s_temp[9], s_temp[10], s_temp[11], s[8], s[9], s[10], s[11]);\ +BIT_LOTR32_25(s_temp[12], s_temp[13], s_temp[14], s_temp[15], s[12], s[13], s[14], s[15]);\ +} + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_2/crypto_aead.h b/knot/Implementations/crypto_aead/knot256/armcortexm_2/crypto_aead.h new file mode 100644 index 0000000..cdfdf19 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_2/crypto_aead.h @@ -0,0 +1,17 @@ +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c b/knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c new file mode 100644 index 0000000..17041b6 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_2/encrypt.c @@ -0,0 +1,452 @@ + +#include"auxFormat.h" + +#define aead_RATE (128 / 8) +#define PR0_ROUNDS 100 +#define PR_ROUNDS 52 +#define PRF_ROUNDS 56 +unsigned char constant7Format_aead[127] = { + /*constant7_aead_256*/ +0x1, +0x4, +0x10, +0x40, +0x2, +0x8, +0x21, +0x5, +0x14, +0x50, +0x42, +0xa, +0x29, +0x24, +0x11, +0x44, +0x12, +0x48, +0x23, +0xd, +0x35, +0x55, +0x56, +0x5a, +0x6b, +0x2e, +0x38, +0x60, +0x3, +0xc, +0x31, +0x45, +0x16, +0x58, +0x63, +0xf, +0x3d, +0x74, +0x53, +0x4e, +0x3b, +0x6c, +0x32, +0x49, +0x27, +0x1d, +0x75, +0x57, +0x5e, +0x7b, +0x6e, +0x3a, +0x68, +0x22, +0x9, +0x25, +0x15, +0x54, +0x52, +0x4a, +0x2b, +0x2c, +0x30, +0x41, +0x6, +0x18, +0x61, +0x7, +0x1c, +0x71, +0x47, +0x1e, +0x79, +0x66, +0x1b, +0x6d, +0x36, +0x59, +0x67, +0x1f, +0x7d, +0x76, +0x5b, +0x6f, +0x3e, +0x78, +0x62, +0xb, +0x2d, +0x34, +0x51, +0x46, +0x1a, +0x69, +0x26, +0x19, +0x65, +0x17, +0x5c, +0x73, +0x4f, +0x3f, +0x7c, +0x72, +0x4b, +0x2f, +0x3c, +0x70, +0x43, +0xe, +0x39, +0x64, +0x13, +0x4c, +0x33, +0x4d, +0x37, +0x5d, +0x77, +0x5f, +0x7f, +0x7e, +0x7a, +0x6a, +0x2a, +0x28, +0x20, +}; + + + + +/* State + * w12 w8 w4 w0 + * w13 w9 w5 w1 + * w14 w10 w6 w2 + * w15 w11 w7 w3 + * + */ + static void permutation512(unsigned int *in, int rounds, unsigned char *rc) { + uint32_t w0, w1, w2, w3, w4, w5, w6, w7; + uint32_t w8, w9, w10, w11, w12, w13, w14, w15; + uint32_t s0, s1, s2,s3; + uint32_t i=0; + __asm volatile( + "ldr w0, [in] \n\t" + "ldr w4, [in, #4] \n\t" + "ldr w8, [in, #8] \n\t" + "ldr w12, [in, #12] \n\t" + "ldr w1, [in, #16] \n\t" + "ldr w5, [in, #20] \n\t" + "ldr w9, [in, #24] \n\t" + "ldr w13, [in, #28] \n\t" + "ldr w2, [in, #32] \n\t" + "ldr w6, [in, #36] \n\t" + "ldr w10, [in, #40] \n\t" + "ldr w14, [in, #44] \n\t" + "ldr w3, [in, #48] \n\t" + "ldr w7, [in, #52] \n\t" + "ldr w11, [in, #56] \n\t" + "ldr w15, [in, #60] \n\t" + "enc_loop: \n\t" + "/*add round const s0 s1 s2 s3*/ \n\t" + "ldrb s3, [rc] \n\t" + + "LSR s0, s3, #6 \n\t" + "and s0, s0, 0x3 \n\t" + + "LSR s1, s3, #4 \n\t" + "and s1, s1, 0x3 \n\t" + + "LSR s2, s3, #2 \n\t" + "and s2, s2, 0x3 \n\t" + + "and s3, s3, 0x3 \n\t" + + "eors w12, w12, s0 \n\t" + "eors w8, w8, s1 \n\t" + "eors w4, w4, s2 \n\t" + "eors w0, w0, s3 \n\t" + "/*sbox first column*/ \n\t" + "mvns w0, w0 \n\t" + "ands s0, w1, w0 \n\t" + "eors s0, w2, s0 \n\t" + "orrs w2, w1, w2 \n\t" + "eors w0, w3, w0 \n\t" + "eors w2, w2, w0 \n\t" + "eors s1, w1, w3 \n\t" + "eors w3, w3, s0 \n\t" + "ands w0, s0, w0 \n\t" + "eors w0, s1, w0 \n\t" + "ands w1, w2, s1 \n\t" + "eors w1, s0, w1 \n\t" + "/*sbox second column*/ \n\t" + "mvns w4, w4 \n\t" + "ands s0, w5, w4 \n\t" + "eors s0, w6, s0 \n\t" + "orrs w6, w5, w6 \n\t" + "eors w4, w7, w4 \n\t" + "eors w6, w6, w4 \n\t" + "eors s1, w5, w7 \n\t" + "eors w7, w7, s0 \n\t" + "ands w4, s0, w4 \n\t" + "eors w4, s1, w4 \n\t" + "ands w5, w6, s1 \n\t" + "eors w5, s0, w5 \n\t" + "/*sbox third column*/ \n\t" + "mvns w8, w8 \n\t" + "ands s0, w9, w8 \n\t" + "eors s0, w10, s0 \n\t" + "orrs w10, w9, w10 \n\t" + "eors w8, w11, w8 \n\t" + "eors w10, w10, w8 \n\t" + "eors s1, w9, w11 \n\t" + "eors w11, w11, s0 \n\t" + "ands w8, s0, w8 \n\t" + "eors w8, s1, w8 \n\t" + "ands w9, w10, s1 \n\t" + "eors w9, s0, w9 \n\t" + "/*sbox forth column*/ \n\t" + "mvns w12, w12 \n\t" + "ands s0, w13, w12 \n\t" + "eors s0, w14, s0 \n\t" + "orrs w14, w13, w14 \n\t" + "eors w12, w15, w12 \n\t" + "eors w14, w14, w12 \n\t" + "eors s1, w13, w15 \n\t" + "eors w15, w15, s0 \n\t" + "ands w12, s0, w12 \n\t" + "eors w12, s1, w12 \n\t" + "ands w13, w14, s1 \n\t" + "eors w13, s0, w13 \n\t" + "/*rotate shift left 1 bit [ w13 w9 w5 w1-> w9 w5 w1 (w13,1)] */ \n\t" + "mov s0, w13 \n\t" + "mov w13, w9 \n\t" + "mov w9, w5 \n\t" + "mov w5, w1 \n\t" + "ROR w1, s0 , #31 \n\t" + "/*rotate shift left 8 bits [w14 w10 w6 w2->(w14,4) (w10,4) (w6,4) ( w2,4)]*/ \n\t" + "ROR w14, w14 , #28 \n\t" + "ROR w10, w10 , #28 \n\t" + "ROR w6, w6 , #28 \n\t" + "ROR w2, w2 , #28 \n\t" + "/*rotate shift left 25 bit [w15 w11 w7 w3-> (w11,13) (w7,14) ( w3,14) ( w15,14)] */ \n\t" + "mov s0, w15 \n\t" + "ROR w15, w11 , #26 \n\t" + "ROR w11, w7 , #26 \n\t" + "ROR w7 , w3 , #26 \n\t" + "ROR w3 , s0 , #25 \n\t" + "/*loop control*/ \n\t" + "adds rc, rc, #1 \n\t" + "subs rounds, rounds, #1 \n\t" + "bne enc_loop \n\t" + "str w0, [in] \n\t" + "str w4, [in, #4] \n\t" + "str w8, [in, #8] \n\t" + "str w12, [in, #12] \n\t" + "str w1, [in, #16] \n\t" + "str w5, [in, #20] \n\t" + "str w9, [in, #24] \n\t" + "str w13, [in, #28] \n\t" + "str w2, [in, #32] \n\t" + "str w6, [in, #36] \n\t" + "str w10, [in, #40] \n\t" + "str w14, [in, #44] \n\t" + "str w3, [in, #48] \n\t" + "str w7, [in, #52] \n\t" + "str w11, [in, #56] \n\t" + "str w15, [in, #60] \n\t" + ); +} + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +) { + u32 i, j; + u32 s_temp[16] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + // initialization + u32 s[16] = { 0 }; + u32 dataFormat[4] = { 0 }; + u8 tempData[16] = {0}; + *clen = mlen + CRYPTO_ABYTES; + //initialization + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket((s + 4), (npub + 16)); + packU128FormatToFourPacket((s + 8), k); + packU128FormatToFourPacket((s + 12), (k + 16)); + permutation512(s,PR0_ROUNDS,constant7Format_aead); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + permutation512(s,PR_ROUNDS,constant7Format_aead); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + permutation512(s,PR_ROUNDS,constant7Format_aead); + } + s[15] ^= 0x80000000; + if (mlen) { + while (mlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + unpackU128FormatToFourPacket(c, s); + permutation512(s,PR_ROUNDS,constant7Format_aead); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen]= 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + unpackU128FormatToFourPacket(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + permutation512(s,PRF_ROUNDS,constant7Format_aead); + // return tag + unpackU128FormatToFourPacket(c, s); + unpackU128FormatToFourPacket((c+16), (s+4)); + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +){ + u32 s_temp[16] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u8 i, j; + // initialization + u32 s[16] = { 0 }; + u32 dataFormat[4] = { 0 }; + u32 dataFormat_1[4] = { 0 }; + u32 dataFormat_2[4] = { 0 }; + u8 tempData[16] = { 0 }; + u8 tempU8[64] = { 0 }; + + if (clen < CRYPTO_ABYTES) + return -1; + *mlen = clen - CRYPTO_ABYTES; + //initialization + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket((s + 4), (npub + 16)); + packU128FormatToFourPacket((s + 8), k); + packU128FormatToFourPacket((s + 12), (k + 16)); + permutation512(s,PR0_ROUNDS,constant7Format_aead); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + permutation512(s,PR_ROUNDS,constant7Format_aead); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + permutation512(s,PR_ROUNDS,constant7Format_aead); + } + s[15] ^= 0x80000000; + clen = clen - CRYPTO_KEYBYTES; + + if (clen) { + while (clen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat_2, c); + dataFormat_1[0] = s[0] ^ dataFormat_2[0]; + dataFormat_1[1] = s[1] ^ dataFormat_2[1]; + dataFormat_1[2] = s[2] ^ dataFormat_2[2]; + dataFormat_1[3] = s[3] ^ dataFormat_2[3]; + unpackU128FormatToFourPacket(m, dataFormat_1); + s[0] = dataFormat_2[0]; + s[1] = dataFormat_2[1]; + s[2] = dataFormat_2[2]; + s[3] = dataFormat_2[3]; + permutation512(s,PR_ROUNDS,constant7Format_aead); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU128FormatToFourPacket(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU128FormatToFourPacket(s, tempU8); + } + // finalization + permutation512(s,PRF_ROUNDS,constant7Format_aead); + // return tag + packU128FormatToFourPacket(dataFormat, c); + packU128FormatToFourPacket(dataFormat_1, (c + 16)); + if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3] + || dataFormat_1[0] != s[4] || dataFormat_1[1] != s[5] || dataFormat_1[2] != s[6] || dataFormat_1[3] != s[7]) { + return -1; + } + return 0; + +} \ No newline at end of file -- libgit2 0.26.0