From 9cd651e371b47ff54fcf2c6e5b8b2596521ed374 Mon Sep 17 00:00:00 2001 From: Wentao Zhang Date: Mon, 26 Oct 2020 12:49:57 +0000 Subject: [PATCH] knot --- knot/Implementations/crypto_aead/knot128v1/armcortexm_7/api.h | 8 ++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_7/auxFormat.c | 327 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_7/auxFormat.h | 11 +++++++++++ knot/Implementations/crypto_aead/knot128v1/armcortexm_7/encrypt.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v1/opt_1/api.h | 6 ++++++ knot/Implementations/crypto_aead/knot128v1/opt_1/encrypt.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/opt_1/api.h | 5 +++++ knot/Implementations/crypto_aead/knot128v2/opt_1/encrypt.c | 238 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/opt_2/api.h | 5 +++++ knot/Implementations/crypto_aead/knot128v2/opt_2/encrypt.c | 230 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/api.h | 5 +++++ knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/encrypt.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_5_/api.h | 6 ++++++ knot/Implementations/crypto_aead/knot192/armcortexm_5_/auxFormat.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_5_/auxFormat.h | 213 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_5_/crypto_aead.h | 18 ++++++++++++++++++ knot/Implementations/crypto_aead/knot192/armcortexm_5_/encrypt.c | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/opt_1/api.h | 5 +++++ knot/Implementations/crypto_aead/knot192/opt_1/encrypt.c | 259 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/opt_2/api.h | 5 +++++ knot/Implementations/crypto_aead/knot192/opt_2/encrypt.c | 253 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot192/opt_ARR/api.h | 5 +++++ knot/Implementations/crypto_aead/knot192/opt_ARR/encrypt.c | 199 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_3_/api.h | 7 +++++++ knot/Implementations/crypto_aead/knot256/armcortexm_3_/auxFormat.h | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_3_/crypto_aead.h | 17 +++++++++++++++++ knot/Implementations/crypto_aead/knot256/armcortexm_3_/encrypt.c | 200 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot256/opt_1/api.h | 5 +++++ knot/Implementations/crypto_aead/knot256/opt_1/encrypt.c | 193 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_aead/knot256/opt_SSE/api.h | 5 +++++ knot/Implementations/crypto_aead/knot256/opt_SSE/encrypt.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_hash/knot256v1/armcortexm_7/api.h | 1 + knot/Implementations/crypto_hash/knot256v1/armcortexm_7/auxFormat.c | 320 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_hash/knot256v1/armcortexm_7/auxFormat.h | 14 ++++++++++++++ knot/Implementations/crypto_hash/knot256v1/armcortexm_7/hash.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_hash/knot256v1/opt_1/api.h | 2 ++ knot/Implementations/crypto_hash/knot256v1/opt_1/hash.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_hash/knot256v2/opt_1/api.h | 2 ++ knot/Implementations/crypto_hash/knot256v2/opt_1/hash.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_hash/knot384/opt_1/api.h | 2 ++ knot/Implementations/crypto_hash/knot384/opt_1/hash.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_hash/knot512/opt_1/api.h | 2 ++ knot/Implementations/crypto_hash/knot512/opt_1/hash.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ knot/Implementations/crypto_hash/knot512/opt_SSE/api.h | 1 + knot/Implementations/crypto_hash/knot512/opt_SSE/hash.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 45 files changed, 4247 insertions(+) create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_7/api.h create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_7/auxFormat.c create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_7/auxFormat.h create mode 100644 knot/Implementations/crypto_aead/knot128v1/armcortexm_7/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot128v1/opt_1/api.h create mode 100644 knot/Implementations/crypto_aead/knot128v1/opt_1/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot128v2/opt_1/api.h create mode 100644 knot/Implementations/crypto_aead/knot128v2/opt_1/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot128v2/opt_2/api.h create mode 100644 knot/Implementations/crypto_aead/knot128v2/opt_2/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/api.h create mode 100644 knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_5_/api.h create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_5_/auxFormat.c create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_5_/auxFormat.h create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_5_/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot192/armcortexm_5_/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot192/opt_1/api.h create mode 100644 knot/Implementations/crypto_aead/knot192/opt_1/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot192/opt_2/api.h create mode 100644 knot/Implementations/crypto_aead/knot192/opt_2/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot192/opt_ARR/api.h create mode 100644 knot/Implementations/crypto_aead/knot192/opt_ARR/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_3_/api.h create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_3_/auxFormat.h create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_3_/crypto_aead.h create mode 100644 knot/Implementations/crypto_aead/knot256/armcortexm_3_/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot256/opt_1/api.h create mode 100644 knot/Implementations/crypto_aead/knot256/opt_1/encrypt.c create mode 100644 knot/Implementations/crypto_aead/knot256/opt_SSE/api.h create mode 100644 knot/Implementations/crypto_aead/knot256/opt_SSE/encrypt.c create mode 100644 knot/Implementations/crypto_hash/knot256v1/armcortexm_7/api.h create mode 100644 knot/Implementations/crypto_hash/knot256v1/armcortexm_7/auxFormat.c create mode 100644 knot/Implementations/crypto_hash/knot256v1/armcortexm_7/auxFormat.h create mode 100644 knot/Implementations/crypto_hash/knot256v1/armcortexm_7/hash.c create mode 100644 knot/Implementations/crypto_hash/knot256v1/opt_1/api.h create mode 100644 knot/Implementations/crypto_hash/knot256v1/opt_1/hash.c create mode 100644 knot/Implementations/crypto_hash/knot256v2/opt_1/api.h create mode 100644 knot/Implementations/crypto_hash/knot256v2/opt_1/hash.c create mode 100644 knot/Implementations/crypto_hash/knot384/opt_1/api.h create mode 100644 knot/Implementations/crypto_hash/knot384/opt_1/hash.c create mode 100644 knot/Implementations/crypto_hash/knot512/opt_1/api.h create mode 100644 knot/Implementations/crypto_hash/knot512/opt_1/hash.c create mode 100644 knot/Implementations/crypto_hash/knot512/opt_SSE/api.h create mode 100644 knot/Implementations/crypto_hash/knot512/opt_SSE/hash.c diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/api.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/api.h new file mode 100644 index 0000000..732ae75 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/api.h @@ -0,0 +1,8 @@ + +#define CRYPTO_KEYBYTES 16 // +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/auxFormat.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/auxFormat.c new file mode 100644 index 0000000..4a3475f --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/auxFormat.c @@ -0,0 +1,327 @@ +#include"auxFormat.h" + + +void P256(unsigned int *s, unsigned char *rc, unsigned char rounds) +{ + unsigned int reg1, reg2; + asm volatile ( + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], s[4], s[6]); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[S_4], %[reg1] \n\t" + "orrs %[S_4], %[S_2], %[S_4] \n\t" + "eors %[S_0], %[S_6], %[S_0] \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] \n\t" + "eors %[S_6], %[S_6], %[reg1] \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], s[3], s[5], s[7]) */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_3], %[S_1] \n\t" + "eors %[reg1], %[S_5], %[reg1] \n\t" + "orrs %[S_5], %[S_3], %[S_5] \n\t" + "eors %[S_1], %[S_7], %[S_1] \n\t" + "eors %[S_5], %[S_5], %[S_1] \n\t" + "eors %[reg2], %[S_3], %[S_7] \n\t" + "eors %[S_7], %[S_7], %[reg1] \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[reg2],%[S_1] \n\t" + "ands %[S_3], %[S_5], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "enc_loop2: \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_3], %[reg1],%[S_3] \n\t" + + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_3], %[reg1],%[S_3] \n\t" + "/*loop control*/ \n\t" + "subs %[ro], %[ro], #1 \n\t" + "bne enc_loop2 \n\t" + + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_3], %[reg1],%[S_3] \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "ROR %[S_3], #31 \n\t" + "ROR %[S_4], #28 \n\t" + "ROR %[S_5], #28 \n\t" + "ROR %[S_6], #20 \n\t" + "ROR %[S_7], #19 \n\t" + : /* output variables - including inputs that are changed */ + [ro] "+r" (rounds),[reg1] "=r" (reg1), [reg2] "=r" (reg2), [rc] "+r" (rc), + [S_0] "+r" (s[0]), [S_2] "+r" (s[2]), [S_4] "+r" (s[4]), [S_6] "+r" (s[6]) , + [S_1] "+r" (s[1]), [S_3] "+r" (s[3]), [S_5] "+r" (s[5]), [S_7] "+r" (s[7]) + : /* input variables */ + : /* clobber registers for temporary values */ + ); +} + +void packFormat(u32 * out, const u8 * in) { + u32 t0 = U32BIG(((u32* )in)[0]); + u32 t1 = U32BIG(((u32* )in)[1]); + u32 r0, r1; + r0 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= r0 ^ (r0 << 1); + r0 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= r0 ^ (r0 << 2); + r0 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= r0 ^ (r0 << 4); + r0 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= r0 ^ (r0 << 8); //t0 odd even + r1 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= r1 ^ (r1 << 1); + r1 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= r1 ^ (r1 << 2); + r1 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= r1 ^ (r1 << 4); + r1 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= r1 ^ (r1 << 8); //t1 odd even + out[0] = (t1 & 0xFFFF0000) | (t0 >> 16); // t1.odd|t0.odd + out[1] = (t1 << 16) | (t0 & 0x0000FFFF); // t1.even|t0.even +} +void unpackFormat(u8 * out, u32 * in) { + u32 t[2] = { 0 }; + t[1] = (in[0] & 0xFFFF0000) | (in[1] >> 16); + t[0] = (in[1] & 0x0000FFFF) | (in[0] << 16); + u32 r0, r1; + r0 = (t[0] ^ (t[0] >> 8)) & 0x0000FF00, t[0] ^= r0 ^ (r0 << 8); + r0 = (t[0] ^ (t[0] >> 4)) & 0x00F000F0, t[0] ^= r0 ^ (r0 << 4); + r0 = (t[0] ^ (t[0] >> 2)) & 0x0C0C0C0C, t[0] ^= r0 ^ (r0 << 2); + r0 = (t[0] ^ (t[0] >> 1)) & 0x22222222, t[0] ^= r0 ^ (r0 << 1); + r1 = (t[1] ^ (t[1] >> 8)) & 0x0000FF00, t[1] ^= r1 ^ (r1 << 8); + r1 = (t[1] ^ (t[1] >> 4)) & 0x00F000F0, t[1] ^= r1 ^ (r1 << 4); + r1 = (t[1] ^ (t[1] >> 2)) & 0x0C0C0C0C, t[1] ^= r1 ^ (r1 << 2); + r1 = (t[1] ^ (t[1] >> 1)) & 0x22222222, t[1] ^= r1 ^ (r1 << 1); + memcpy(out, t, 8 * sizeof(unsigned char)); +} + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/auxFormat.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/auxFormat.h new file mode 100644 index 0000000..4f715f8 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/auxFormat.h @@ -0,0 +1,11 @@ +#include"api.h" +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +void P256(unsigned int *s, unsigned char *rc, unsigned char rounds); +void packFormat(u32 * out, const u8 * in); +void unpackFormat(u8 * out, u32 * in); diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/encrypt.c new file mode 100644 index 0000000..41ef437 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_7/encrypt.c @@ -0,0 +1,166 @@ +#include"auxFormat.h" + +//#define RATE (64 / 8) +#define RATE 8 +/* + +#define PR0_ROUNDS 25 +#define PR_ROUNDS 13 +#define PRF_ROUNDS 15 + + * */ +#define PR0_ROUNDS 12 +#define PR_ROUNDS 6 +#define PRF_ROUNDS 7 + +unsigned char constant6Format[52] = { 0x01, 0x10, 0x02, 0x20, 0x04, 0x41, 0x11, + 0x12, 0x22, 0x24, 0x45, 0x50, 0x03, 0x30, 0x06, 0x61, 0x15, 0x53, 0x33, + 0x36, 0x67, 0x74, 0x46, 0x60, 0x05, 0x51, 0x13, 0x32, 0x26, 0x65, 0x54, + 0x42, 0x21, 0x14, 0x43, 0x31, 0x16, 0x63, 0x35, 0x57, 0x72, 0x27, 0x75, + 0x56, 0x62, 0x25, 0x55, 0x52, 0x23, 0x34, 0x47, 0x70, }; + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[8] = { 0 }; + u32 dataFormat[2] = { 0 }; + u8 tempData[16]; + //initialization + *clen = mlen + CRYPTO_ABYTES; + packFormat(s, npub); + packFormat((s + 2), (npub + 8)); + packFormat((s + 4), k); + packFormat((s + 6), (k + 8)); + P256(s, constant6Format, PR0_ROUNDS); + // process associated data + if (adlen) { + while (adlen >= RATE) { + packFormat(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + } + s[6] ^= 0x80000000; + //Encryption: + if (mlen) { + while (mlen >= RATE) { + packFormat(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(c, s); + + P256(s, constant6Format, PR_ROUNDS); + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + P256(s, constant6Format, PRF_ROUNDS); + unpackFormat(tempData, s); + unpackFormat((tempData + 8), (s + 2)); + memcpy(c, tempData, CRYPTO_ABYTES); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[8] = { 0 }; + u32 dataFormat[4] = { 0 }; + u32 dataFormat_1[2] = { 0 }; + u8 tempU8[32] = { 0 }; + u8 tempData[16]; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + //initialization + packFormat(s, npub); + packFormat((s + 2), (npub + 8)); + packFormat((s + 4), k); + packFormat((s + 6), (k + 8)); + P256(s, constant6Format, PR0_ROUNDS); + // process associated data + if (adlen) { + while (adlen >= RATE) { + packFormat(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + } + + s[6] ^= 0x80000000; + // process c + clen = clen - CRYPTO_KEYBYTES; + if (clen) { + while (clen >= RATE) { + packFormat(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + unpackFormat(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + P256(s, constant6Format, PR_ROUNDS); + clen -= RATE; + m += RATE; + c += RATE; + } + unpackFormat(tempU8, s); + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + memcpy(m, tempU8, clen * sizeof(unsigned char)); + memcpy(tempU8, tempData, clen * sizeof(unsigned char)); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + c += clen; + + packFormat(s, tempU8); + } + // finalization + P256(s, constant6Format, PRF_ROUNDS); + unpackFormat(tempData, s); + unpackFormat((tempData + 8), (s + 2)); + if (memcmp((void*) tempData, (void*) c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot128v1/opt_1/api.h b/knot/Implementations/crypto_aead/knot128v1/opt_1/api.h new file mode 100644 index 0000000..236f0bb --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/opt_1/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot128v1/opt_1/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/opt_1/encrypt.c new file mode 100644 index 0000000..345d719 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/opt_1/encrypt.c @@ -0,0 +1,166 @@ +#include +#include "api.h" + +#include +typedef unsigned long long u64; +typedef unsigned char u8; +typedef long long i64; + +#define RATE 8 + +#define PR0_ROUNDS 52 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 + +#define LOTR64(x,n) (((x)<<(n))|((x)>>(64-(n)))) +#define U64BIG(x) (x) +static const u8 constant6[52] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, + 0x06, 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29, 0x13, 0x27, 0x0f, + 0x1e, 0x3d, 0x3a, 0x34, 0x28, 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, + 0x24, 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37, 0x2e, 0x1d, 0x3b, + 0x36, 0x2c, 0x19, 0x33, 0x26, 0x0d, 0x1a, 0x35, 0x2a }; + +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} +#define ROUND256(i) {\ +s[0]^=constant6[i];\ +sbox(s[0], s[1], s[2], s[3], x5, x6, x7);\ +s[1]=LOTR64(x5,1);\ +s[2]=LOTR64(x6,8);\ +s[3]=LOTR64(x7,25);\ +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + *clen = mlen + CRYPTO_ABYTES; + u64 x7, x6, x5,i; + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u8 tempData[8] = { 0 }; + u64 s[4] = { 0 }; + // initialization + memcpy(s, npub, CRYPTO_NPUBBYTES); + memcpy(s + 2, k, CRYPTO_KEYBYTES); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND256(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + s[0] ^= U64BIG(((u64*)ad)[0]); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(i); + } + adlen -= RATE; + ad += RATE; + } + + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen ); + tempData[adlen] = 0x01; + s[0] ^= U64BIG(((u64*)tempData)[0]); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(i); + } + } + s[3] ^= 0x8000000000000000; + // process plaintext + if (mlen) { + while (mlen >= RATE) { + s[0] ^= U64BIG(*(u64* )m); + memcpy(c, s, RATE ); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(i); + } + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + s[0] ^= U64BIG(((u64*)tempData)[0]); + memcpy(c, s, mlen ); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND256(i); + } + // return tag + memcpy(c, s, CRYPTO_ABYTES); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + if (clen < CRYPTO_KEYBYTES) + return -1; + *mlen = clen - CRYPTO_KEYBYTES; + u64 x7, x6, x5, i; + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u8 tempData[8] = { 0 }; + u64 s[4] = { 0 }; + // initialization + memcpy(s, npub, CRYPTO_NPUBBYTES); + memcpy(s + 2, k, CRYPTO_KEYBYTES); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND256(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + s[0] ^= U64BIG(((u64*)ad)[0]); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(i); + } + adlen -= RATE; + ad += RATE; + } + + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen ); + tempData[adlen] = 0x01; + s[0] ^= U64BIG(((u64*)tempData)[0]); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(i); + } + } + s[3] ^= 0x8000000000000000; + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= RATE) { + U64BIG(*(u64*)(m)) = s[0] ^ U64BIG(*(u64*)(c)); + memcpy(s, c, RATE ); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(i); + } + clen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, c, clen ); + tempData[clen] = 0x01; + s[0] ^= U64BIG(*(u64*)(tempData)); + memcpy(m, s, clen ); + memcpy(s, c, clen ); + c += clen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND256(i); + } + if (memcmp((void*)s, (void*)c, CRYPTO_ABYTES)) { + memset(m, 0, (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot128v2/opt_1/api.h b/knot/Implementations/crypto_aead/knot128v2/opt_1/api.h new file mode 100644 index 0000000..ca69f5f --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/opt_1/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot128v2/opt_1/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/opt_1/encrypt.c new file mode 100644 index 0000000..1809c4f --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/opt_1/encrypt.c @@ -0,0 +1,238 @@ +#include"api.h" +typedef unsigned char u8; +typedef unsigned long long u64; +typedef unsigned int u32; +#define PR0_ROUNDS 76 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 +#define RATE 24 +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} +#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n))) +#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n))) +#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32) +#define ROTR96MORE322(a,b,n) (b<>(96-n)) +#define U32BIG(x) (x) +#define U64BIG(x) (x) +u8 constant7[76] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, 0x0c, + 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, + 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, 0x2c, + 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, + 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, 0x0e, + 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, + 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b }; +#define ROUND384(i){\ +x00 ^= constant7[i];\ +sbox(x00, x10, x20, x30, x50, x60, x70);\ +sbox(x01, x11, x21, x31, x51, x61, x71);\ +x11 = ROTR961(x51, x50, 1);\ +x10 = ROTR962(x51, x50, 1);\ +x21 = ROTR961(x61, x60, 8);\ +x20 = ROTR962(x61, x60, 8);\ +x31 = ROTR96MORE321(x71, x70, 55);\ +x30 = ROTR96MORE322(x71, x70, 55);\ +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u64 i; + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0; + u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0; + u8 tempData[24] = { 0 }; + u8 tempData1[24] = { 0 }; + u64 x50, x60, x70; + u32 x51, x61, x71; + *clen = mlen + CRYPTO_KEYBYTES; + // initialization + x00 = U64BIG(*(u64*)(npub)); + x01 = U32BIG(*(u32*)(npub + 8)); + x10 = ((u64)U32BIG(*(u32*)(k)) << 32) + | ((u64)U32BIG(*(u32*)(npub + 12))); + x11 = U32BIG(*(u32*)(k + 4)); + x20 = U64BIG(*(u64*)(k + 8)); + x30 = 0; + x31 = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + x00 ^= U64BIG(*(u64*)(ad)); + x01 ^= U32BIG(*(u32*)(ad + 8)); + x10 ^= ((u64)U32BIG(*(u32*)(ad + 16)) << 32) | ((u64)U32BIG(*(u32*)(ad + 12))); + x11 ^= U32BIG(*(u32*)(ad + 20)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + x10 ^= ((u64)U32BIG(*(u32*)(tempData + 16)) << 32) | ((u64)U32BIG(*(u32*)(tempData + 12))); + x11 ^= U32BIG(*(u32*)(tempData + 20)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + x31 ^= 0x80000000; + // process plaintext + if (mlen) { + while (mlen >= RATE) { + x00 ^= U64BIG(*(u64*)(m)); + x01 ^= U32BIG(*(u32*)(m + 8)); + x10 ^= ((u64)U32BIG(*(u32*)(m + 16)) << 32) | ((u64)U32BIG(*(u32*)(m + 12))); + x11 ^= U32BIG(*(u32*)(m + 20)); + *(u64*)c = U64BIG(x00); + *(u32*)(c + 8) = U32BIG(x01); + *(u32*)(c + 12) = U32BIG(x10); + *(u32*)(c + 16) = U32BIG(x10 >> 32); + *(u32*)(c + 20) = U32BIG(x11); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + x10 ^= ((u64)U32BIG(*(u32*)(tempData + 16)) << 32) | ((u64)U32BIG(*(u32*)(tempData + 12))); + x11 ^= U32BIG(*(u32*)(tempData + 20)); + *(u64*)tempData1 = U64BIG(x00); + *(u32*)(tempData1 + 8) = U32BIG(x01); + *(u32*)(tempData1 + 12) = U32BIG(x10); + *(u32*)(tempData1 + 16) = U32BIG(x10 >> 32); + *(u32*)(tempData1 + 20) = U32BIG(x11); + memcpy(c, tempData1, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + *(u64*)tempData = U64BIG(x00); + *(u32*)(tempData + 8) = U32BIG(x01); + *(u32*)(tempData + 12) = U32BIG(x10); + memcpy(c, tempData, CRYPTO_ABYTES); + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + u64 i; + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0; + u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0; + u8 tempData[24] = { 0 }; + u8 tempData1[24] = { 0 }; + u64 x50, x60, x70; + u32 x51, x61, x71; + // initialization + x00 = U64BIG(*(u64*)(npub)); + x01 = U32BIG(*(u32*)(npub + 8)); + x10 = ((u64)U32BIG(*(u32*)(k)) << 32) + | ((u64)U32BIG(*(u32*)(npub + 12))); + x11 = U32BIG(*(u32*)(k + 4)); + x20 = U64BIG(*(u64*)(k + 8)); + x30 = 0; + x31 = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + x00 ^= U64BIG(*(u64*)(ad)); + x01 ^= U32BIG(*(u32*)(ad + 8)); + x10 ^= ((u64)U32BIG(*(u32*)(ad + 16)) << 32) | ((u64)U32BIG(*(u32*)(ad + 12))); + x11 ^= U32BIG(*(u32*)(ad + 20)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + x10 ^= ((u64)U32BIG(*(u32*)(tempData + 16)) << 32) | ((u64)U32BIG(*(u32*)(tempData + 12))); + x11 ^= U32BIG(*(u32*)(tempData + 20)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + x31 ^= 0x80000000; + // process plaintext + clen -= CRYPTO_KEYBYTES; + if (clen) { + while (clen >= RATE) { + *(u64*)(m) = U64BIG(x00) ^ (*(u64*)(c)); + *(u32*)(m + 8) = U32BIG(x01) ^ (*(u32*)(c + 8)); + *(u32*)(m + 12) = U32BIG(x10) ^ (*(u32*)(c + 12)); + *(u32*)(m + 16) = U32BIG(x10 >> 32) ^ (*(u32*)(c + 16)); + *(u32*)(m + 20) = U32BIG(x11) ^ (*(u32*)(c + 20)); + x00 = U64BIG(*(u64*)(c)); + x01 = U32BIG(*(u32*)(c + 8)); + x10 = U64BIG(*(u64*)(c + 12)); + x11 = U32BIG(*(u32*)(c + 20)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + *(u64*)(tempData1) = U64BIG(x00) ^ (*(u64*)(tempData)); + *(u32*)(tempData1 + 8) = U32BIG(x01) ^ (*(u32*)(tempData + 8)); + //*(u64*)(tempData1 + 12) = U64BIG(x10) ^ (*(u64*)(tempData + 12)); + *(u32*)(tempData1 + 12) = U32BIG(x10) ^ (*(u32*)(tempData + 12)); + *(u32*)(tempData1 + 16) = U32BIG(x10 >> 32) ^ (*(u32*)(tempData + 16)); + *(u32*)(tempData1 + 20) = U32BIG(x11) ^ (*(u32*)(tempData + 20)); + memcpy(m, tempData1, clen * sizeof(unsigned char)); + memcpy(tempData1, c, clen * sizeof(unsigned char)); + x00 = U64BIG(*(u64*)(tempData1)); + x01 = U32BIG(*(u32*)(tempData1 + 8)); + x10 = ((u64)U32BIG(*(u32*)(tempData1 + 16)) << 32) | ((u64)U32BIG(*(u32*)(tempData1 + 12))); + x11 = U32BIG(*(u32*)(tempData1 + 20)); + c += clen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return -1 if verification fails + *(u64*)(tempData1) = U64BIG(x00); + *(u32*)(tempData1 + 8) = U32BIG(x01); + *(u32*)(tempData1 + 12) = U32BIG(x10); + if (memcmp((void*)tempData1, (void*)c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} + + diff --git a/knot/Implementations/crypto_aead/knot128v2/opt_2/api.h b/knot/Implementations/crypto_aead/knot128v2/opt_2/api.h new file mode 100644 index 0000000..ca69f5f --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/opt_2/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot128v2/opt_2/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/opt_2/encrypt.c new file mode 100644 index 0000000..b62ac40 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/opt_2/encrypt.c @@ -0,0 +1,230 @@ +#include"api.h" +#include +typedef unsigned char u8; +typedef unsigned long long u64; +typedef unsigned int u32; +#define PR0_ROUNDS 76 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 +#define RATE 24 +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} +#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n))) +#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n))) +#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32) +#define ROTR96MORE322(a,b,n) (b<>(96-n)) +#define U32BIG(x) (x) +#define U64BIG(x) (x) +u8 constant7[76] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, 0x0c, + 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, + 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, 0x2c, + 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, + 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, 0x0e, + 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, + 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b }; +#define ROUND384(i){\ +x00 ^= constant7[i];\ +sbox(x00, x10, x20, x30, x50, x60, x70);\ +sbox(x01, x11, x21, x31, x51, x61, x71);\ +x11 = ROTR961(x51, x50, 1);\ +x10 = ROTR962(x51, x50, 1);\ +x21 = ROTR961(x61, x60, 8);\ +x20 = ROTR962(x61, x60, 8);\ +x31 = ROTR96MORE321(x71, x70, 55);\ +x30 = ROTR96MORE322(x71, x70, 55);\ +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u64 i; + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0; + u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0; + u8 tempData[24] = { 0 }; + u8 tempData1[24] = { 0 }; + u64 x50, x60, x70; + u32 x51, x61, x71; + *clen = mlen + CRYPTO_KEYBYTES; + // initialization + x00 = U64BIG(*(u64*)(npub)); + x01 = U32BIG(*(u32*)(npub + 8)); + x10 = ((u64)U32BIG(*(u32*)(k)) << 32) + | ((u64)U32BIG(*(u32*)(npub + 12))); + x11 = U32BIG(*(u32*)(k + 4)); + x20 = U64BIG(*(u64*)(k + 8)); + x30 = 0; + x31 = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + x00 ^= U64BIG(*(u64*)(ad)); + x01 ^= U32BIG(*(u32*)(ad + 8)); + x10 ^= U64BIG(*(u64*)(ad + 12)); + x11 ^= U32BIG(*(u32*)(ad + 20)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + x10 ^= U64BIG(*(u64*)(tempData + 12)); + x11 ^= U32BIG(*(u32*)(tempData + 20)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + x31 ^= 0x80000000; + // process plaintext + if (mlen) { + while (mlen >= RATE) { + x00 ^= U64BIG(*(u64*)(m)); + x01 ^= U32BIG(*(u32*)(m + 8)); + x10 ^= U64BIG(*(u64*)(m + 12)); + x11 ^= U32BIG(*(u32*)(m + 20)); + *(u64*)c = U64BIG(x00); + *(u32*)(c + 8) = U32BIG(x01); + *(u64*)(c + 12) = U64BIG(x10); + *(u32*)(c + 20) = U32BIG(x11); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + x10 ^= U64BIG(*(u64*)(tempData + 12)); + x11 ^= U32BIG(*(u32*)(tempData + 20)); + *(u64*)tempData1 = U64BIG(x00); + *(u32*)(tempData1 + 8) = U32BIG(x01); + *(u64*)(tempData1 + 12) = U64BIG(x10); + *(u32*)(tempData1 + 20) = U32BIG(x11); + memcpy(c, tempData1, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + *(u64*)c = U64BIG(x00); + *(u32*)(c + 8) = U32BIG(x01); + *(u32*)(c + 12) = U32BIG(x10); + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + u64 i; + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0; + u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0; + u8 tempData[24] = { 0 }; + u8 tempData1[24] = { 0 }; + u64 x50, x60, x70; + u32 x51, x61, x71; + // initialization + x00 = U64BIG(*(u64*)(npub)); + x01 = U32BIG(*(u32*)(npub + 8)); + x10 = ((u64)U32BIG(*(u32*)(k)) << 32) + | ((u64)U32BIG(*(u32*)(npub + 12))); + x11 = U32BIG(*(u32*)(k + 4)); + x20 = U64BIG(*(u64*)(k + 8)); + x30 = 0; + x31 = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + x00 ^= U64BIG(*(u64*)(ad)); + x01 ^= U32BIG(*(u32*)(ad + 8)); + x10 ^= U64BIG(*(u64*)(ad + 12)); + x11 ^= U32BIG(*(u32*)(ad + 20)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + x10 ^= U64BIG(*(u64*)(tempData + 12)); + x11 ^= U32BIG(*(u32*)(tempData + 20)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + x31 ^= 0x80000000; + // process plaintext + clen -= CRYPTO_KEYBYTES; + if (clen) { + while (clen >= RATE) { + *(u64*)(m) = U64BIG(x00) ^ (*(u64*)(c)); + *(u32*)(m + 8) = U32BIG(x01) ^ (*(u32*)(c + 8)); + *(u64*)(m + 12) = U64BIG(x10) ^ (*(u64*)(c + 12)); + *(u32*)(m + 20) = U32BIG(x11) ^ (*(u32*)(c + 20)); + x00 = U64BIG(*(u64*)(c)); + x01 = U32BIG(*(u32*)(c + 8)); + x10 = U64BIG(*(u64*)(c + 12)); + x11 = U32BIG(*(u32*)(c + 20)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + *(u64*)(tempData1) = U64BIG(x00) ^ (*(u64*)(tempData)); + *(u32*)(tempData1 + 8) = U32BIG(x01) ^ (*(u32*)(tempData + 8)); + *(u64*)(tempData1 + 12) = U64BIG(x10) ^ (*(u64*)(tempData + 12)); + *(u32*)(tempData1 + 20) = U32BIG(x11) ^ (*(u32*)(tempData + 20)); + memcpy(m, tempData1, clen * sizeof(unsigned char)); + memcpy(tempData1, c, clen * sizeof(unsigned char)); + x00 = U64BIG(*(u64*)(tempData1)); + x01 = U32BIG(*(u32*)(tempData1 + 8)); + x10 = U64BIG(*(u64*)(tempData1 + 12)); + x11 = U32BIG(*(u32*)(tempData1 + 20)); + c += clen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return -1 if verification fails + if ((*(u64*)(c) != U64BIG(x00)) || (*(u32*)(c + 8) != U32BIG(x01)) + || (*(u32*)(c + 12) != (u32)U64BIG(x10))) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} + diff --git a/knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/api.h b/knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/api.h new file mode 100644 index 0000000..ca69f5f --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/encrypt.c new file mode 100644 index 0000000..9cb8b3b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/opt_3_ARR/encrypt.c @@ -0,0 +1,205 @@ +#include"api.h" +typedef unsigned char u8; +typedef unsigned long long u64; +typedef unsigned int u32; + +#define PR0_ROUNDS 76 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 +#define RATE 24 +#define ROTR64(x,n) (((x)>>(n))|((x)<<(64-(n)))) +#define LOTR64(x,n) (((x)<<(n))|((x)>>(64-(n)))) +#define ROTR32(x,n) (((x)>>(n))|((x)<<(32-(n)))) +#define sbox64(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t4 = b | c; t5 = d ^ t1; g = t4 ^ t5; t6 = b ^ d; t7 = t3 & t5; a = t6 ^ t7; t8 = g & t6; f = t3 ^ t8; \ +} +#define sbox32(a, b, c, d, f, g, h) \ +{ \ + t_1 = ~a; t_2 = b & t_1;t_3 = c ^ t_2; h = d ^ t_3; t_4 = b | c; t_5 = d ^ t_1; g = t_4 ^ t_5; t_6 = b ^ d; t_7 = t_3 & t_5; a = t_6 ^ t_7; t_8 = g & t_6; f = t_3 ^ t_8; \ +} + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n))) +#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n))) + +#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32) +#define ROTR96MORE322(a,b,n) (b<>(96-n)) + +#define U32BIG(x) (x) +#define U64BIG(x) (x) +u8 constant7[76] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, + 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, + 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, + 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, + 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, + 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, + 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b }; + +#define ROUND384(i){\ + s[0] ^= constant7[i]; \ + sbox64(U64BIG(*(u64*)(s)), U64BIG(*(u64*)(s+3)), U64BIG(*(u64*)(s+6)), U64BIG(*(u64*)(s+9)), x50, x60, x70); \ + sbox32(s[2], s[5], s[8], s[11], x51, x61, x71); \ + s[5] = ROTR961(x51, x50, 1); \ + U64BIG(*(u64*)(s + 3)) = ROTR962(x51, x50, 1); \ + s[8] = ROTR961(x61, x60, 8); \ + U64BIG(*(u64*)(s + 6)) = ROTR962(x61, x60, 8); \ + s[11] = ROTR96MORE321(x71, x70, 55); \ + U64BIG(*(u64*)(s + 9)) = ROTR96MORE322(x71, x70, 55); \ +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + *clen = mlen + CRYPTO_ABYTES; + u32 s[12] = { 0 }, i; + u64 t1, t2, t3, t5, t6, t8, t4, t7; + u32 t_1, t_2, t_3, t_5, t_6, t_8, t_4, t_7; + u64 x50, x60, x70; + u32 x51, x61, x71; + u8 tempData[24] = { 0 }; + memcpy(s, npub, CRYPTO_NPUBBYTES); + memcpy(s + CRYPTO_NPUBBYTES / 4, k, CRYPTO_KEYBYTES); + s[11] = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(ad)); + U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(ad + 8)); + U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(ad + 16)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen ); + tempData[adlen] = 0x01; + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData)); + U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(tempData + 8)); + U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(tempData + 16)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[11] ^= 0x80000000; + // process plaintext + if (mlen) { + while (mlen >= RATE) { + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(m)); + U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(m + 8)); + U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(m + 16)); + memcpy(c, s, RATE ); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, m, mlen ); + tempData[mlen] = 0x01; + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData)); + U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(tempData + 8)); + U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(tempData + 16)); + memcpy(c, s, mlen ); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + memcpy(c, s, CRYPTO_ABYTES); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + u32 s[12] = { 0 }, i; + u64 t1, t2, t3, t5, t6, t8, t4, t7; + u32 t_1, t_2, t_3, t_5, t_6, t_8, t_4, t_7; + u64 x50, x60, x70; + u32 x51, x61, x71; + u8 tempData[24] = { 0 }; + memcpy(s, npub, CRYPTO_NPUBBYTES); + memcpy(s + CRYPTO_NPUBBYTES / 4, k, CRYPTO_KEYBYTES); + s[11] = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(ad)); + U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(ad + 8)); + U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(ad + 16)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen ); + tempData[adlen] = 0x01; + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData)); + U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(tempData + 8)); + U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(tempData + 16)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[11] ^= 0x80000000; + ///////// + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= RATE) { + U64BIG(*(u64*)(m)) = + U64BIG(*(u64*)(s)) ^ U64BIG(*(u64*)(c)); + U64BIG(*(u64*)(m + 8)) = U64BIG( + *(u64*)(s + 2)) ^ U64BIG(*(u64*)(c + 8)); + U64BIG(*(u64*)(m + 16)) = U64BIG( + *(u64*)(s + 4)) ^ U64BIG(*(u64*)(c + 16)); + memcpy(s, c, RATE ); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, c, clen ); + tempData[clen] = 0x01; + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData)); + U64BIG(*(u64*)(s + 2)) ^= U64BIG(*(u64*)(tempData + 8)); + U64BIG(*(u64*)(s + 4)) ^= U64BIG(*(u64*)(tempData + 16)); + memcpy(m, s, clen ); + memcpy(s, c, clen ); + c += clen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + if (memcmp((void*)s, (void*)c, CRYPTO_ABYTES)) { + memset(m, 0, (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} + + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_5_/api.h b/knot/Implementations/crypto_aead/knot192/armcortexm_5_/api.h new file mode 100644 index 0000000..c3cb1d9 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_5_/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_5_/auxFormat.c b/knot/Implementations/crypto_aead/knot192/armcortexm_5_/auxFormat.c new file mode 100644 index 0000000..60d96ad --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_5_/auxFormat.c @@ -0,0 +1,98 @@ + +#include"auxFormat.h" +void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum) {\ + unsigned int t,t1,t2; + u32 rci; + rci=c[0]; + ROUND384_1(rci); + t = 1; + while (lunnum--) { + rci=c[t]; + ROUND384_2(rci); + t++; + rci=c[t]; + ROUND384_3(rci); + t++; + rci=c[t]; + ROUND384_4(rci); + t++; + } +} + + + + +void packU96FormatToThreePacket(u32 * out, u8 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + temp0[0] = U32BIG(((u32*)in)[0]); + temp0[1] = U32BIG(((u32*)in)[0]) >> 1; + temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = U32BIG(((u32*)in)[1]); + temp1[1] = U32BIG(((u32*)in)[1]) >>1; + temp1[2] = U32BIG(((u32*)in)[1]) >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + temp2[0] = U32BIG(((u32*)in)[2]); + temp2[1] = U32BIG(((u32*)in)[2]) >> 1; + temp2[2] = U32BIG(((u32*)in)[2]) >> 2; + puckU32ToThree_1(temp2[0]); + puckU32ToThree_1(temp2[1]); + puckU32ToThree_1(temp2[2]); + out[0] = (temp2[1]<<21) |(temp1[0]<<10) |temp0[2]; + out[1] = (temp2[0] << 21) | (temp1[2] << 11) | temp0[1]; + out[2] = (temp2[2] << 22) | (temp1[1] << 11) | temp0[0]; +} +void unpackU96FormatToThreePacket(u8 * out, u32 * in) { + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + u32 t[3] = { 0 }; + temp0[0] = in[2] & 0x7ff; + temp0[1] = in[1] & 0x7ff; + temp0[2] = in[0] & 0x3ff; + temp1[0] = (in[0]>>10) & 0x7ff; + temp1[1] = (in[2] >>11 ) & 0x7ff; + temp1[2] = (in[1] >> 11) & 0x3ff; + temp2[0] = in[1] >> 21; + temp2[1] = in[0] >> 21; + temp2[2] = in[2] >> 22; + unpuckU32ToThree_1(temp0[0]); + unpuckU32ToThree_1(temp0[1]); + unpuckU32ToThree_1(temp0[2]); + t[0] = temp0[0] | temp0[1] << 1 | temp0[2] << 2; + unpuckU32ToThree_1(temp1[0]); + unpuckU32ToThree_1(temp1[1]); + unpuckU32ToThree_1(temp1[2]); + t[1] = temp1[0] | temp1[1] << 1 | temp1[2] << 2; + unpuckU32ToThree_1(temp2[0]); + unpuckU32ToThree_1(temp2[1]); + unpuckU32ToThree_1(temp2[2]); + t[2] = temp2[0] | temp2[1] << 1 | temp2[2] << 2; + memcpy(out, t, 12 * sizeof(unsigned char)); +} +void packU48FormatToThreePacket(u32 * out, u8 * in) { + u32 t1 = (u32)U16BIG(*(u16*)(in + 4)); + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + temp0[0] = U32BIG(((u32*)in)[0]); + temp0[1] = U32BIG(((u32*)in)[0]) >> 1; + temp0[2] = U32BIG(((u32*)in)[0]) >> 2; + puckU32ToThree_1(temp0[0]); + puckU32ToThree_1(temp0[1]); + puckU32ToThree_1(temp0[2]); + temp1[0] = t1; + temp1[1] = t1 >> 1; + temp1[2] = t1 >> 2; + puckU32ToThree_1(temp1[0]); + puckU32ToThree_1(temp1[1]); + puckU32ToThree_1(temp1[2]); + out[0] = (temp1[0] << 10) | temp0[2]; + out[1] = (temp1[2] << 11) | temp0[1]; + out[2] = (temp1[1] << 11) | temp0[0]; +} diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_5_/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_5_/auxFormat.h new file mode 100644 index 0000000..bc59690 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_5_/auxFormat.h @@ -0,0 +1,213 @@ + +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +#define puckU32ToThree_1(x){\ +x &= 0x49249249;\ +x = (x | (x >> 2)) & 0xc30c30c3;\ +x = (x | (x >>4)) & 0x0f00f00f;\ +x = (x | (x >> 8)) & 0xff0000ff;\ +x = (x | (x >> 16)) & 0xfff;\ +} +#define unpuckU32ToThree_1(x){\ +x &= 0xfff;\ +x = (x | (x << 16)) & 0xff0000ff;\ +x = (x | (x << 8)) & 0x0f00f00f;\ +x = (x | (x << 4)) & 0xc30c30c3;\ +x = (x | (x << 2)) & 0x49249249;\ +} +void ROUND384_Three(unsigned int *s, unsigned char *c,int lunnum); +#define ARC(rci) \ + do { \ + __asm__ __volatile__ ( \ + "/*add round const s0 s1 s2 */ \n\t"\ + "ands %[t1], %[rci], #0xc0\n\t" \ + "eors %[S_0], %[S_0], %[t1], LSR #6 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x38\n\t" \ + "eors %[S_1], %[S_1], %[t1], LSR #3 \n\t" /*s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;*/\ + "ands %[t1], %[rci], #0x7\n\t" \ + "eors %[S_3], %[S_3], %[t1] \n\t" /*s[2] ^= constant7Format[lunNum] & 0x7;*/\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [rci] "+r" (rci), \ + [S_0] "+r" (s[0]), [S_1] "+r" (s[1]), [S_3] "+r" (s[2])\ + : : );\ +}while (0) +#define SBOX(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX1(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "ROR %[S_4] , #30 \n\t"\ + "ROR %[S_6] , #14 \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX2(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ +"ROR %[S_4] , #29 \n\t"\ +"ROR %[S_6] , #14 \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define SBOX3(S1,S2,S3,S4) \ + do { \ + __asm__ __volatile__ ( \ + "/*sbox column*/ \n\t"\ + "ROR %[S_2] , #31 \n\t"\ + "ROR %[S_4] , #29 \n\t"\ + "ROR %[S_6] , #13 \n\t"\ + "mvns %[S_0], %[S_0] \n\t"\ + "ands %[t1], %[S_2], %[S_0] \n\t"\ + "eors %[t1], %[S_4], %[t1] \n\t"\ + "orrs %[S_4], %[S_2], %[S_4] \n\t"\ + "eors %[S_0], %[S_6], %[S_0] \n\t"\ + "eors %[S_4], %[S_4], %[S_0] \n\t"\ + "eors %[t2], %[S_2], %[S_6] \n\t"\ + "eors %[S_6], %[S_6], %[t1] \n\t"\ + "ands %[S_0], %[t1] , %[S_0] \n\t"\ + "eors %[S_0], %[t2] , %[S_0] \n\t"\ + "ands %[S_2], %[S_4], %[t2] \n\t"\ + "eors %[S_2], %[t1] , %[S_2] \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1), [t2] "=r" (t2),\ + [S_0] "+r" (S1), [S_2] "+r" (S2), [S_4] "+r" (S3), [S_6] "+r" (S4) \ + : : );\ +}while (0) +#define ROUND384_1(rci) {\ + ARC(rci);\ +SBOX(s[0], s[3], s[6], s[9] );\ +SBOX(s[1], s[4], s[7], s[10]);\ +SBOX(s[2], s[5], s[8], s[11]);\ +} +#define ROUND384_2(rci) {\ + ARC(rci);\ +SBOX1(s[0], s[4], s[8], s[10] );\ +SBOX2(s[1], s[5], s[6], s[11]);\ +SBOX3(s[2], s[3], s[7], s[9]);\ +} +#define ROUND384_3(rci) {\ + ARC(rci);\ +SBOX1(s[0], s[5], s[7], s[11]);\ +SBOX2(s[1], s[3], s[8], s[9]);\ +SBOX3(s[2], s[4], s[6], s[10]);\ +} +#define ROUND384_4(rci) {\ + ARC(rci);\ +SBOX1(s[0], s[3], s[6], s[9]);\ +SBOX2(s[1], s[4], s[7], s[10]);\ +SBOX3(s[2], s[5], s[8], s[11]);\ +} +#define P384_1( s, round, lunNum) {\ + u32 t1;\ + ROUND384_Three(s,round,lunNum);\ + __asm__ __volatile__ ( \ + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ + "mov %[t1], %[S_3] \n\t"\ + "mov %[S_3], %[S_4] \n\t"\ + "mov %[S_4], %[S_5] \n\t"\ + "ROR %[S_5], %[t1] , #31 \n\t"\ + "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t"\ + "mov %[t1], %[S_8] \n\t"\ + "ROR %[S_8], %[S_7] , #29 \n\t"\ + "ROR %[S_7], %[S_6] , #29 \n\t"\ + "ROR %[S_6], %[t1] , #30 \n\t"\ + "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t1], %[S_9] \n\t"\ + "ROR %[S_9], %[S_10] , #14 \n\t"\ + "ROR %[S_10], %[S_11] , #14 \n\t"\ + "ROR %[S_11], %[t1] , #13 \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1),\ + [S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\ + [S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\ + [S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\ + : : );\ +} +#define P384_2( s, round, lunNum) {\ + u32 t1,rci;\ + ROUND384_Three(s,round,lunNum);\ + rci=round[lunNum*3+1];\ + ROUND384_2(rci);\ + __asm__ __volatile__ ( \ + "/*rotate shift left 1 bit [w9 w5 w1-> (w1,1) w9 w5] */ \n\t"\ + "mov %[t1], %[S_4] \n\t"\ + "mov %[S_4], %[S_3] \n\t"\ + "mov %[S_3], %[S_5] \n\t"\ + "ROR %[S_5], %[t1] , #31 \n\t"\ + "/*rotate shift left 8 bits [w10 w6 w2-> (w6,3) (w2,3) ( w10,2)]*/ \n\t"\ + "mov %[t1], %[S_8] \n\t"\ + "ROR %[S_8], %[S_6] , #29 \n\t"\ + "ROR %[S_6], %[S_7] , #30 \n\t"\ + "ROR %[S_7], %[t1] , #29 \n\t"\ + "/*rotate shift left 55 bit [w11 w7 w3-> (w3,13) (w11,14) ( w7,14)] */ \n\t"\ + "mov %[t1], %[S_10] \n\t"\ + "ROR %[S_10], %[S_9] , #14 \n\t"\ + "ROR %[S_9], %[S_11] , #14 \n\t"\ + "ROR %[S_11], %[t1] , #13 \n\t"\ + : /* output variables - including inputs that are changed */\ + [t1] "=r" (t1),\ + [S_3] "+r" (s[3]), [S_6] "+r" (s[6]), [S_9] "+r" (s[9]) ,\ + [S_4] "+r" (s[4]), [S_7] "+r" (s[7]), [S_10] "+r" (s[10]),\ + [S_5] "+r" (s[5]), [S_8] "+r" (s[8]), [S_11] "+r" (s[11])\ + : : );\ +} + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_5_/crypto_aead.h b/knot/Implementations/crypto_aead/knot192/armcortexm_5_/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_5_/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_5_/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_5_/encrypt.c new file mode 100644 index 0000000..c29a321 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_5_/encrypt.c @@ -0,0 +1,177 @@ + +#include"auxFormat.h" + +#define aead_RATE 12 + +#define PR0_ROUNDS 25 +#define PR_ROUNDS 13 +#define PRF_ROUNDS 14 +/* +#define PR0_ROUNDS 76 /3=25+1 +#define PR_ROUNDS 40 /3=13+1 +#define PRF_ROUNDS 44 /3=14+2 + * */ +unsigned char constant7Format[76] = { +0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, +0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, +0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, +0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, +0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, +0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, +0xc7,0x1e,0xd1,0x8f}; +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 s[12] = { 0 }; + u32 dataFormat[3] = { 0 }; + u8 tempData[24] = { 0 }; + u32 t2; + *clen = mlen + CRYPTO_ABYTES; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); + + P384_1(s, constant7Format,PR0_ROUNDS); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384_1(s, constant7Format,PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + P384_1(s, constant7Format,PR_ROUNDS); + } + s[9] ^= 0x80000000; + if (mlen) { + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(c, s); + P384_1(s, constant7Format,PR_ROUNDS); + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(tempData, s); + memcpy(c, tempData, mlen); + c += mlen; + } + // finalization + P384_2(s, constant7Format,PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(tempData, s); + unpackU96FormatToThreePacket((tempData + 12), (s + 3)); + memcpy(c, tempData, CRYPTO_ABYTES ); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s[12] = { 0 }; + u32 dataFormat[6] = { 0 }; + u32 dataFormat_1[3] = { 0 }; + u8 tempData[12] = { 0 }; + u8 tempU8[48] = { 0 }; + u32 t2; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); + + P384_1(s, constant7Format,PR0_ROUNDS); + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + + P384_1(s, constant7Format,PR_ROUNDS); + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, aead_RATE); + + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + + P384_1(s, constant7Format,PR_ROUNDS); + } + s[9] ^= 0x80000000; + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + unpackU96FormatToThreePacket(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + + P384_1(s, constant7Format,PR_ROUNDS); + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + memset(tempData, 0, aead_RATE); + memcpy(tempData, c, clen ); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG(((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG(((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG(((u32* )tempData)[2]); + memcpy(m, tempU8, clen ); + memcpy(tempU8, tempData, clen ); + packU96FormatToThreePacket(s, tempU8); + c+=clen; + } + // finalization + + P384_2(s, constant7Format,PRF_ROUNDS); + // return tag + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket(tempU8 + 12, s + 3); + if (memcmp((void*)tempU8, (void*)(c), CRYPTO_ABYTES)) { + memset(m, 0, (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot192/opt_1/api.h b/knot/Implementations/crypto_aead/knot192/opt_1/api.h new file mode 100644 index 0000000..332aa0a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/opt_1/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot192/opt_1/encrypt.c b/knot/Implementations/crypto_aead/knot192/opt_1/encrypt.c new file mode 100644 index 0000000..eda0261 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/opt_1/encrypt.c @@ -0,0 +1,259 @@ +#include"api.h" + +typedef unsigned char u8; +typedef unsigned long long u64; +typedef unsigned int u32; + +#define RATE (96 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 +#define sbox32(a, b, c, d, f, g, h) \ +{ \ +t_1 = ~a; t_2 = b & t_1; t_3 = c ^ t_2; h = d ^ t_3; t_5 = b | c; t_6 = d ^ t_1; g = t_5 ^ t_6; t_8 = b ^ d; t_9 = t_3 & t_6; a = t_8 ^ t_9; t_11 = g & t_8; f = t_3 ^ t_11; \ +} + +#define sbox64(a, b, c, d, f, g, h) \ +{ \ +t1 = ~a; t2 = b & t1; t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + + +#define ROTR64(x,n) (((x)>>(n))|((x)<<(64-(n)))) +#define ROTR32(x,n) (((x)>>(n))|((x)<<(32-(n)))) +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n))) +#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n))) + +#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32) +#define ROTR96MORE322(a,b,n) (b<>(96-n)) + +#define EXT_BYTE32(x,n) ((u8)((u32)(x)>>(8*(n)))) +#define INS_BYTE32(x,n) ((u32)(x)<<(8*(n))) +#define U32BIG(x) (x) +#define EXT_BYTE64(x,n) ((u8)((u64)(x)>>(8*(n)))) +#define INS_BYTE64(x,n) ((u64)(x)<<(8*(n))) +#define U64BIG(x) (x) + +u8 constant7[127] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, + 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, + 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, + 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, + 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, + 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, + 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37, 0x6f, + 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, 0x1a, 0x34, + 0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c, 0x39, + 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a, 0x15, 0x2a, 0x55, 0x2b, 0x57, + 0x2f, 0x5f, 0x3f, 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40 }; + +#define ROUND384(i) {\ +x00 ^= constant7[i]; \ +sbox64(x00, x10, x20, x30, x50, x60, x70); \ +sbox32(x01, x11, x21, x31, x51, x61, x71); \ +x11 = ROTR961(x51, x50, 1); \ +x10 = ROTR962(x51, x50, 1); \ +x21 = ROTR961(x61, x60, 8); \ +x20 = ROTR962(x61, x60, 8); \ +x31 = ROTR96MORE321(x71, x70, 55); \ +x30 = ROTR96MORE322(x71, x70, 55); \ +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + *clen = mlen + CRYPTO_KEYBYTES; + u64 i; //RATE=96/8=12 + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t_1, t_2, t_3, t_5, t_6, t_8, t_9, t_11; + u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0; + u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0; + + u8 tempData[12] = { 0 }; + u8 tempData1[24] = { 0 }; + u8 tempData2[12] = { 0 }; + u64 x50, x60, x70; + u32 x51, x61, x71; + // initialization + + x00 = U64BIG(*(u64*)(npub)); + x01 = U32BIG(*(u32*)(npub + 8)); + x10 = U64BIG(*(u64*)(npub + 12)); + x11 = U32BIG(*(u32*)(npub + 20)); + x20 = U64BIG(*(u64*)(k)); + x21 = U32BIG(*(u32*)(k + 8)); + + memcpy(&x30, k + 12, 8); + memcpy(&x31, k + 20, 4); + //x30 = U64BIG(*(u64*)(k + 12)); + //x31 = U32BIG(*(u32*)(k + 20)); + + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + + // process associated data + if (adlen) { + while (adlen >= RATE) { + x00 ^= ((u64)U32BIG(*(u32*)(ad + 4)) << 32) | ((u64)U32BIG(*(u32*)(ad))); + //x00 ^= U64BIG(*(u64*)(ad)); + x01 ^= U32BIG(*(u32*)(ad + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + //dataXOR32(&x01, ad + 8, 4); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + x31 ^= 0x80000000; + // process plaintext + if (mlen) { + while (mlen >= RATE) { + x00 ^= ((u64)U32BIG(*(u32*)(m + 4)) << 32) | ((u64)U32BIG(*(u32*)(m))); + //x00 ^= U64BIG(*(u64*)(m)); + x01 ^= U32BIG(*(u32*)(m + 8)); + *(u64*)c = U64BIG(x00); + *(u32*)(c + 8) = U32BIG(x01); + + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + *(u64*)tempData1 = U64BIG(x00); + *(u32*)(tempData1 + 8) = U32BIG(x01); + memcpy(c, tempData1, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + *(u64*)tempData1 = U64BIG(x00); + *(u32*)(tempData1 + 8) = U32BIG(x01); + *(u32*)(tempData1 + 12) = U32BIG(x10); + *(u32*)(tempData1 + 16) = U32BIG(x10 >> 32); + *(u32*)(tempData1 + 20) = U32BIG(x11); + + memcpy(c, tempData1, CRYPTO_KEYBYTES); + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + if (clen < CRYPTO_KEYBYTES) + return -1; + *mlen = clen - CRYPTO_KEYBYTES; + + u8 tempData[12] = { 0 }; + u8 tempData1[24] = { 0 }; + u64 i; //RATE=96/8=12 + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t_1, t_2, t_3, t_5, t_6, t_8, t_9, t_11; + u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0; + u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0; + + u64 x50, x60, x70; + u32 x51, x61, x71; + + // initialization + + x00 = U64BIG(*(u64*)(npub)); + x01 = U32BIG(*(u32*)(npub + 8)); + x10 = U64BIG(*(u64*)(npub + 12)); + x11 = U32BIG(*(u32*)(npub + 20)); + x20 = U64BIG(*(u64*)(k)); + x21 = U32BIG(*(u32*)(k + 8)); + x30 = U64BIG(*(u64*)(k + 12)); + x31 = U32BIG(*(u32*)(k + 20)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + x00 ^= ((u64)U32BIG(*(u32*)(ad + 4)) << 32) | ((u64)U32BIG(*(u32*)(ad))); + //x00 ^= U64BIG(*(u64*)(ad)); + x01 ^= U32BIG(*(u32*)(ad + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + x31 ^= 0x80000000; + // process c + clen -= CRYPTO_ABYTES; + if (clen > 0) { + while (clen >= RATE) { + *(u32*)(m) = U32BIG(x00) ^ (*(u32*)(c)); + *(u32*)(m + 4) = U32BIG(x00>>32) ^ (*(u32*)(c + 4)); + *(u32*)(m + 8) = U32BIG(x01) ^ (*(u32*)(c + 8)); + x00 = ((u64)U32BIG(*(u32*)(c + 4)) << 32) | ((u64)U32BIG(*(u32*)(c))); + x01 = U32BIG(*(u32*)(c + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + *(u64*)(tempData1) = U64BIG(x00) ^ (*(u64*)(tempData)); + *(u32*)(tempData1 + 8) = U32BIG(x01) ^ (*(u32*)(tempData + 8)); + memcpy(m, tempData1, clen * sizeof(unsigned char)); + memcpy(tempData1, c, clen * sizeof(unsigned char)); + x00 = U64BIG(*(u64*)(tempData1)); + x01 = U32BIG(*(u32*)(tempData1 + 8)); + c += clen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + *(u64*)tempData1 = U64BIG(x00); + *(u32*)(tempData1 + 8) = U32BIG(x01); + *(u32*)(tempData1 + 12) = U32BIG(x10); + *(u32*)(tempData1 + 16) = U32BIG(x10 >> 32); + *(u32*)(tempData1 + 20) = U32BIG(x11); + // return -1 if verification fails + if (memcmp((void*)tempData1, (void*)c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} + diff --git a/knot/Implementations/crypto_aead/knot192/opt_2/api.h b/knot/Implementations/crypto_aead/knot192/opt_2/api.h new file mode 100644 index 0000000..332aa0a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/opt_2/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot192/opt_2/encrypt.c b/knot/Implementations/crypto_aead/knot192/opt_2/encrypt.c new file mode 100644 index 0000000..8fb1ea8 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/opt_2/encrypt.c @@ -0,0 +1,253 @@ +#include"api.h" + +typedef unsigned char u8; +typedef unsigned long long u64; +typedef unsigned int u32; + +#define RATE (96 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 +#define sbox32(a, b, c, d, f, g, h) \ +{ \ + t_1 = ~a; t_2 = b & t_1;t_3 = c ^ t_2; h = d ^ t_3; t_5 = b | c; t_6 = d ^ t_1; g = t_5 ^ t_6; t_8 = b ^ d; t_9 = t_3 & t_6; a = t_8 ^ t_9; t_11 = g & t_8; f = t_3 ^ t_11; \ +} + +#define sbox64(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + + +#define ROTR64(x,n) (((x)>>(n))|((x)<<(64-(n)))) +#define ROTR32(x,n) (((x)>>(n))|((x)<<(32-(n)))) +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n))) +#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n))) + +#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32) +#define ROTR96MORE322(a,b,n) (b<>(96-n)) + +#define EXT_BYTE32(x,n) ((u8)((u32)(x)>>(8*(n)))) +#define INS_BYTE32(x,n) ((u32)(x)<<(8*(n))) +#define U32BIG(x) (x) +#define EXT_BYTE64(x,n) ((u8)((u64)(x)>>(8*(n)))) +#define INS_BYTE64(x,n) ((u64)(x)<<(8*(n))) +#define U64BIG(x) (x) + +u8 constant7[127] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, + 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, + 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, + 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, + 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, + 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, + 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37, 0x6f, + 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, 0x1a, 0x34, + 0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c, 0x39, + 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a, 0x15, 0x2a, 0x55, 0x2b, 0x57, + 0x2f, 0x5f, 0x3f, 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40 }; + +#define ROUND384(i) {\ +x00 ^= constant7[i];\ +sbox64(x00, x10, x20, x30, x50, x60, x70);\ +sbox32(x01, x11, x21, x31, x51, x61, x71);\ +x11 = ROTR961(x51, x50, 1);\ +x10 = ROTR962(x51, x50, 1);\ +x21 = ROTR961(x61, x60, 8);\ +x20 = ROTR962(x61, x60, 8);\ +x31 = ROTR96MORE321(x71, x70, 55);\ +x30 = ROTR96MORE322(x71, x70, 55);\ +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + *clen = mlen + CRYPTO_KEYBYTES; + u64 i; //RATE=96/8=12 + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t_1, t_2, t_3, t_5, t_6, t_8, t_9, t_11; + u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0; + u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0; + + u8 tempData[12] = { 0 }; + u8 tempData1[12] = { 0 }; + u8 tempData2[12] = { 0 }; + u64 x40, x50, x60, x70; + u32 x41, x51, x61, x71; + // initialization + + x00 = U64BIG(*(u64*)(npub)); + x01 = U32BIG(*(u32*)(npub + 8)); + x10 = U64BIG(*(u64*)(npub + 12)); + x11 = U32BIG(*(u32*)(npub + 20)); + x20 = U64BIG(*(u64*)(k)); + x21 = U32BIG(*(u32*)(k + 8)); + + memcpy(&x30, k + 12, 8); + memcpy(&x31, k + 20, 4); + //x30 = U64BIG(*(u64*)(k + 12)); + //x31 = U32BIG(*(u32*)(k + 20)); + + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + + // process associated data + if (adlen) { + while (adlen >= RATE) { + x00 ^= U64BIG(*(u64*)(ad)); + x01 ^= U32BIG(*(u32*)(ad + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + x31 ^= 0x80000000; + // process plaintext + if (mlen) { + while (mlen >= RATE) { + x00 ^= U64BIG(*(u64*)(m)); + x01 ^= U32BIG(*(u32*)(m + 8)); + *(u64*)c = U64BIG(x00); + *(u32*)(c + 8) = U32BIG(x01); + + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + *(u64*)tempData1 = U64BIG(x00); + *(u32*)(tempData1 + 8) = U32BIG(x01); + memcpy(c, tempData1, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + *(u64*)c = U64BIG(x00); + *(u32*)(c + 8) = U32BIG(x01); + *(u64*)(c + 12) = U64BIG(x10); + *(u32*)(c + 20) = U32BIG(x11); + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + + *mlen = clen - CRYPTO_KEYBYTES; + if (clen < CRYPTO_KEYBYTES) + return -1; + + u8 tempData[12] = { 0 }; + u8 tempData1[12] = { 0 }; + u64 i; //RATE=96/8=12 + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t_1, t_2, t_3, t_5, t_6, t_8, t_9, t_11; + u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0; + u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0; + + u64 x40, x50, x60, x70; + u32 x41, x51, x61, x71; + + // initialization + + x00 = U64BIG(*(u64*)(npub)); + x01 = U32BIG(*(u32*)(npub + 8)); + x10 = U64BIG(*(u64*)(npub + 12)); + x11 = U32BIG(*(u32*)(npub + 20)); + x20 = U64BIG(*(u64*)(k)); + x21 = U32BIG(*(u32*)(k + 8)); + x30 = U64BIG(*(u64*)(k + 12)); + x31 = U32BIG(*(u32*)(k + 20)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + + // process associated data + + if (adlen) { + while (adlen >= RATE) { + x00 ^= U64BIG(*(u64*)(ad)); + x01 ^= U32BIG(*(u32*)(ad + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + x00 ^= U64BIG(*(u64*)(tempData)); + x01 ^= U32BIG(*(u32*)(tempData + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + x31 ^= 0x80000000; + // process c + clen -= CRYPTO_KEYBYTES; + if (clen > 0) { + while (clen >= RATE) { + *(u64*)(m) = U64BIG(x00) ^ (*(u64*)(c)); + *(u32*)(m + 8) = U32BIG(x01) ^ (*(u32*)(c + 8)); + + x00 = U64BIG(*(u64*)(c)); + x01 = U32BIG(*(u32*)(c + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + *(u64*)(tempData1) = U64BIG(x00) ^ (*(u64*)(tempData)); + *(u32*)(tempData1 + 8) = U32BIG(x01) ^ (*(u32*)(tempData + 8)); + memcpy(m, tempData1, clen * sizeof(unsigned char)); + memcpy(tempData1, c, clen * sizeof(unsigned char)); + x00 = U64BIG(*(u64*)(tempData1)); + x01 = U32BIG(*(u32*)(tempData1 + 8)); + c += clen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + memcpy(c+CRYPTO_KEYBYTES/2, tempData1, CRYPTO_KEYBYTES/2); + // return -1 if verification fails + if (*(u32*)(c + 8) != U32BIG(x01) || *(u64*)(c) != U64BIG(x00) + || *(u32*)(c + 20) != U32BIG(x11) + || *(u64*)(c + 12) != U64BIG(x10)){ + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} + diff --git a/knot/Implementations/crypto_aead/knot192/opt_ARR/api.h b/knot/Implementations/crypto_aead/knot192/opt_ARR/api.h new file mode 100644 index 0000000..46e1c15 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/opt_ARR/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot192/opt_ARR/encrypt.c b/knot/Implementations/crypto_aead/knot192/opt_ARR/encrypt.c new file mode 100644 index 0000000..dd18c4a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/opt_ARR/encrypt.c @@ -0,0 +1,199 @@ +#include"api.h" +#include +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 + +typedef unsigned char u8; +typedef unsigned long long u64; +typedef unsigned int u32; + +#define RATE 12 +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t4 = b | c; t5 = d ^ t1; g = t4 ^ t5; t6 = b ^ d; t7 = t3 & t5; a = t6 ^ t7; t8 = g & t6; f = t3 ^ t8; \ +} + +#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n))) +#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n))) + +#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32) +#define ROTR96MORE322(a,b,n) (b<>(96-n)) + +#define U32BIG(x) (x) +#define U64BIG(x) (x) + +u8 constant7[76] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, + 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, + 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, + 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, + 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, + 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, + 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b}; +#define ROUND384(i){\ + s[0] ^= constant7[i]; \ + sbox(U64BIG(((u64*)s)[0]), U64BIG(((u64*)(s+3))[0]), U64BIG(((u64*)(s+6))[0]), U64BIG(((u64*)(s+9))[0]), x50, x60, x70); \ + sbox(s[2], s[5], s[8], s[11], x51, x61, x71); \ + s[5] = ROTR961(x51, x50, 1); \ + U64BIG(((u64*)(s+3))[0]) = ROTR962(x51, x50, 1); \ + s[8] = ROTR961(x61, x60, 8); \ + U64BIG(((u64*)(s+6))[0]) = ROTR962(x61, x60, 8); \ + s[11] = ROTR96MORE321(x71, x70, 55); \ + U64BIG(((u64*)(s+9))[0]) = ROTR96MORE322(x71, x70, 55); \ +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + *clen = mlen + CRYPTO_ABYTES; + u32 s[12] = { 0 }, i; + u64 t1, t2, t3, t5, t6, t8, t4, t7; + u64 x50, x60, x70; + u32 x51, x61, x71; + u8 tempData[24] = { 0 }; + // initialization + memcpy(s, npub, sizeof(unsigned char) * CRYPTO_NPUBBYTES); + memcpy(s + CRYPTO_NPUBBYTES / 4, k, sizeof(unsigned char) * CRYPTO_KEYBYTES); + + + + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(ad)); + s[2] ^= U64BIG(*(u64*)(ad+8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData)); + s[2] ^= U32BIG(*(u32*)(tempData + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[11] ^= 0x80000000; + // process plaintext + if (mlen) { + while (mlen >= RATE) { + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(m)); + s[2] ^= U32BIG(*(u32*)(m + 8)); + memcpy(c, s, RATE * sizeof(unsigned char)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen] = 0x01; + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData)); + s[2] ^= U32BIG(*(u32*)(tempData + 8)); + memcpy(c, s, mlen * sizeof(unsigned char)); + c += mlen; + } + + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + + memcpy(c, s, sizeof(unsigned char) * CRYPTO_ABYTES); + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_KEYBYTES) + return -1; + + u32 s[12] = { 0 }, i; + u64 t1, t2, t3, t5, t6, t8, t4, t7; + u64 x50, x60, x70; + u32 x51, x61, x71; + u8 tempData[24] = { 0 }; + // initialization + memcpy(s, npub, sizeof(unsigned char) * CRYPTO_NPUBBYTES); + memcpy(s + CRYPTO_NPUBBYTES / 4, k, sizeof(unsigned char) * CRYPTO_KEYBYTES); + + + + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(ad)); + s[2] ^= U32BIG(*(u32*)(ad + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData)); + s[2] ^= U32BIG(*(u32*)(tempData + 8)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[11] ^= 0x80000000; + // process c + + ///////// + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= RATE) { + U64BIG(*(u64*)(m)) = U64BIG(*(u64*)(s)) ^ U64BIG(*(u64*)(c)); + *(u32*)(m + 8) = s[2] ^ (*(u32*)(c + 8)); + memcpy(s, c, RATE * sizeof(unsigned char)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, c, clen * sizeof(unsigned char)); + tempData[clen] = 0x01; + U64BIG(*(u64*)(s)) ^= U64BIG(*(u64*)(tempData)); + s[2] ^= U32BIG(*(u32*)(tempData + 8)); + memcpy(m, s, clen * sizeof(unsigned char)); + memcpy(s, c, clen * sizeof(unsigned char)); + // memcpy(m, tempData1, clen * sizeof(unsigned char)); + c += clen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + if (memcmp((void*)s, (void*)c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + + return 0; +} + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3_/api.h b/knot/Implementations/crypto_aead/knot256/armcortexm_3_/api.h new file mode 100644 index 0000000..396f722 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3_/api.h @@ -0,0 +1,7 @@ +#define CRYPTO_KEYBYTES 32 //256/8=32 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 32 +#define CRYPTO_ABYTES 32 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3_/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_3_/auxFormat.h new file mode 100644 index 0000000..47462ee --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3_/auxFormat.h @@ -0,0 +1,93 @@ +#include"api.h" +#include +#define U32BIG(x) (x) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +void printU8(char name[], u8 var[], long len, int offset); + +// t9 +#define puck32(in)\ +{\ +t9 = (in ^ (in >> 1)) & 0x22222222; in ^= t9 ^ (t9 << 1);\ +t9 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= t9 ^ (t9 << 2);\ +t9 = (in ^ (in >> 4)) & 0x00F000F0; in ^= t9 ^ (t9 << 4);\ +t9 = (in ^ (in >> 8)) & 0x0000FF00; in ^= t9 ^ (t9 << 8);\ +} +// t9 +#define unpuck32(t0){\ + t9 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= t9 ^ (t9 << 8); \ + t9 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= t9 ^ (t9 << 4); \ + t9 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= t9 ^ (t9 << 2); \ + t9 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= t9 ^ (t9 << 1); \ +} +//u32 t1, t2, t3,t8, +#define packU128FormatToFourPacket(out,in) {\ + t8 = U32BIG(((u32*)in)[0]); \ + t1 = U32BIG(((u32*)in)[1]); \ + t2 = U32BIG(((u32*)in)[2]); \ + t3 = U32BIG(((u32*)in)[3]); \ + puck32(t8); puck32(t8); \ + puck32(t1); puck32(t1); \ + puck32(t2); puck32(t2); \ + puck32(t3); puck32(t3); \ + out[3] = (t3 & 0xff000000) | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t8 >> 24); \ + out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) | ((t1 >> 8) & 0x0000ff00) | ((t8 >> 16) & 0x000000ff); \ + out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) | (t1 & 0x0000ff00) | ((t8 >> 8) & 0x000000ff); \ + out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) | ((t1 << 8) & 0x0000ff00) | (t8 & 0x000000ff); \ +} +//u32 u32 t1, t2, t3,t8, +#define unpackU128FormatToFourPacket( out, in) {\ +t[3] = (in[3] & 0xff000000 )| ((in[2] >> 8) & 0x00ff0000) | ((in[1] >> 16) & 0x0000ff00) | (in[0] >> 24); \ +t[2] = ((in[3] << 8) & 0xff000000) | (in[2] & 0x00ff0000) | ((in[1] >> 8) & 0x0000ff00) | ((in[0] >> 16) & 0x000000ff); \ +t[1] = ((in[3] << 16) & 0xff000000) | ((in[2] << 8) & 0x00ff0000) | (in[1] & 0x0000ff00) | ((in[0] >> 8) & 0x000000ff); \ +t[0] = ((in[3] << 24) & 0xff000000) | ((in[2] << 16) & 0x00ff0000) | ((in[1] << 8) & 0x0000ff00) | (in[0] & 0x000000ff); \ +unpuck32(t[0]); \ +unpuck32(t[0]); \ +unpuck32(t[1]); \ +unpuck32(t[1]); \ +unpuck32(t[2]); \ +unpuck32(t[2]); \ +unpuck32(t[3]);\ +unpuck32(t[3]); \ +memcpy(out, t, 16 * sizeof(unsigned char));\ +} +#define BIT_LOTR32_16(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t0, 4);\ +t5 = LOTR32(t1, 4);\ +t6 = LOTR32(t2, 4); \ +t7 = LOTR32(t3, 4); \ +} +#define BIT_LOTR32_25(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t3, 7);\ +t5 = LOTR32(t0, 6);\ +t6 = LOTR32(t1, 6); \ +t7 = LOTR32(t2, 6); \ +} + +#define ROUND512( lunNum) {\ +s[3] ^= (constant7Format_aead[lunNum] >> 6) & 0x3;\ +s[2] ^= (constant7Format_aead[lunNum] >> 4) & 0x3;\ +s[1] ^= (constant7Format_aead[lunNum] >> 2) & 0x3;\ +s[0] ^= constant7Format_aead[lunNum] & 0x3;\ +sbox(s[3], s[7], s[11], s[15], s_temp[7], s_temp[11], s_temp[15]);\ +sbox(s[2], s[6], s[10], s[14], s[7] , s_temp[10], s_temp[14]);\ +sbox(s[1], s[5], s[9], s[13], s[6] , s_temp[9], s_temp[13]);\ +sbox(s[0], s[4], s[8], s[12], s[5] , s_temp[8], s_temp[12]);\ +s[4]= LOTR32(s_temp[7], 1);\ +BIT_LOTR32_16(s_temp[8], s_temp[9], s_temp[10], s_temp[11], s[8], s[9], s[10], s[11]);\ +BIT_LOTR32_25(s_temp[12], s_temp[13], s_temp[14], s_temp[15], s[12], s[13], s[14], s[15]);\ +} + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3_/crypto_aead.h b/knot/Implementations/crypto_aead/knot256/armcortexm_3_/crypto_aead.h new file mode 100644 index 0000000..cdfdf19 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3_/crypto_aead.h @@ -0,0 +1,17 @@ +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3_/encrypt.c b/knot/Implementations/crypto_aead/knot256/armcortexm_3_/encrypt.c new file mode 100644 index 0000000..fe6e355 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3_/encrypt.c @@ -0,0 +1,200 @@ +#include"auxFormat.h" + +#define aead_RATE 16 +#define PR0_ROUNDS 100 +#define PR_ROUNDS 52 +#define PRF_ROUNDS 56 +/* + #define PR0_ROUNDS 100 + #define PR_ROUNDS 52 + #define PRF_ROUNDS 56 + * */ +unsigned char constant7Format_aead[100] = { 0x01, 0x04, 0x10, 0x40, 0x02, 0x08, + 0x21, 0x05, 0x14, 0x50, 0x42, 0x0a, 0x29, 0x24, 0x11, 0x44, 0x12, 0x48, + 0x23, 0x0d, 0x35, 0x55, 0x56, 0x5a, 0x6b, 0x2e, 0x38, 0x60, 0x03, 0x0c, + 0x31, 0x45, 0x16, 0x58, 0x63, 0x0f, 0x3d, 0x74, 0x53, 0x4e, 0x3b, 0x6c, + 0x32, 0x49, 0x27, 0x1d, 0x75, 0x57, 0x5e, 0x7b, 0x6e, 0x3a, 0x68, 0x22, + 0x09, 0x25, 0x15, 0x54, 0x52, 0x4a, 0x2b, 0x2c, 0x30, 0x41, 0x06, 0x18, + 0x61, 0x07, 0x1c, 0x71, 0x47, 0x1e, 0x79, 0x66, 0x1b, 0x6d, 0x36, 0x59, + 0x67, 0x1f, 0x7d, 0x76, 0x5b, 0x6f, 0x3e, 0x78, 0x62, 0x0b, 0x2d, 0x34, + 0x51, 0x46, 0x1a, 0x69, 0x26, 0x19, 0x65, 0x17, 0x5c, 0x73, }; + +#define Processing_Data(data) \ +do { \ + packU128FormatToFourPacket(dataFormat, data); \ + s[0] ^= dataFormat[0]; \ + s[1] ^= dataFormat[1]; \ + s[2] ^= dataFormat[2]; \ + s[3] ^= dataFormat[3]; \ +} while (0) + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u32 i; + u32 s_temp[16] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 s[16] = { 0 }; + u32 t[4] = { 0 }; + u32 dataFormat[4] = { 0 }; + u8 tempData[16] = { 0 }; + u8 tempU8[32] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + //initialization + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket((s + 4), (npub + 16)); + packU128FormatToFourPacket((s + 8), k); + packU128FormatToFourPacket((s + 12), (k + 16)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND512(i); + } + // process associated data + //PAD(adlen, ad); + if (adlen) { + while (adlen >= aead_RATE) { + Processing_Data(ad); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, ad, adlen ); + tempData[adlen] = 0x01; + Processing_Data(tempData); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + } + s[15] ^= 0x80000000; + // process p data + if (mlen) { + while (mlen >= aead_RATE) { + Processing_Data(m); + unpackU128FormatToFourPacket(c, s); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, m, mlen ); + tempData[mlen] = 0x01; + Processing_Data(tempData); + unpackU128FormatToFourPacket(tempData, s); + memcpy(c, tempData, mlen ); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND512(i); + } + unpackU128FormatToFourPacket(tempU8, s); + unpackU128FormatToFourPacket((tempU8 + 16), (s + 4)); + memcpy(c, tempU8, CRYPTO_ABYTES ); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u32 s_temp[16] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u8 i; + // initialization + u32 s[16] = { 0 }; + u32 dataFormat_1[4] = { 0 }; + u32 dataFormat[4] = { 0 }; + u8 tempData[16] = { 0 }; + u8 tempU8[64] = { 0 }; + + u32 t[4] = { 0 }; + if (clen < CRYPTO_ABYTES) + return -1; + *mlen = clen - CRYPTO_ABYTES; + //initialization + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket((s + 4), (npub + 16)); + packU128FormatToFourPacket((s + 8), k); + packU128FormatToFourPacket((s + 12), (k + 16)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND512(i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + Processing_Data(ad); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, aead_RATE); + memcpy(tempData, ad, adlen ); + tempData[adlen] = 0x01; + Processing_Data(tempData); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + } + s[15] ^= 0x80000000; + // process c data + clen = clen - CRYPTO_KEYBYTES; + if (clen) { + while (clen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + dataFormat_1[3] = s[3] ^ dataFormat[3]; + unpackU128FormatToFourPacket(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU128FormatToFourPacket(tempU8, s); + memset(tempData, 0, aead_RATE); + memcpy(tempData, c, clen ); + tempData[clen] = 0x01; + U32BIG(((u32*)tempU8)[0]) ^= U32BIG( + ((u32* )tempData)[0]); + U32BIG(((u32*)tempU8)[1]) ^= U32BIG( + ((u32* )tempData)[1]); + U32BIG(((u32*)tempU8)[2]) ^= U32BIG( + ((u32* )tempData)[2]); + U32BIG(((u32*)tempU8)[3]) ^= U32BIG( + ((u32* )tempData)[3]); + memcpy(m, tempU8, clen ); + memcpy(tempU8, tempData, clen ); + c += clen; + tempU8[i] ^= 0x01; + packU128FormatToFourPacket(s, tempU8); + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND512(i); + + } + unpackU128FormatToFourPacket(tempU8, s); + unpackU128FormatToFourPacket((tempU8 + 16), (s + 4)); + if (memcmp((void*) tempU8, (void*) c, CRYPTO_ABYTES)) { + memset(m, 0, (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot256/opt_1/api.h b/knot/Implementations/crypto_aead/knot256/opt_1/api.h new file mode 100644 index 0000000..04c656a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/opt_1/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 32 +#define CRYPTO_ABYTES 32 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot256/opt_1/encrypt.c b/knot/Implementations/crypto_aead/knot256/opt_1/encrypt.c new file mode 100644 index 0000000..86bce41 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/opt_1/encrypt.c @@ -0,0 +1,193 @@ +#include +#include "api.h" + +typedef unsigned char u8; +typedef unsigned long long u64; +typedef long long i64; + +#include +#include +#include +static const u8 constant7[100] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, + 0x03, 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, + 0x23, 0x47, 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, + 0x0b, 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, + 0x29, 0x53, 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, + 0x43, 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, + 0x13, 0x26, 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, + 0x37, 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, + 0x1a, 0x34, 0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d }; +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define LOTR1281(a,b,n) (((a)<<(n))|((b)>>(64-n))) +#define LOTR1282(a,b,n) (((b)<<(n))|((a)>>(64-n))) + +#define U64BIG(x) (x) + +#define RATE 16 +#define PR0_ROUNDS 100 +#define PR_ROUNDS 52 +#define PRF_ROUNDS 56 + +#define ROUND512(i) {\ + s[0]^=constant7[i];\ + sbox(s[0], s[2], s[4], s[6], b10, b20, b30);\ + sbox(s[1], s[3], s[5], s[7], b11, b21, b31);\ + s[2]=LOTR1281(b10,b11,1);\ + s[4]=LOTR1281(b20,b21,16);\ + s[6]=LOTR1281(b30,b31,25);\ + s[3]=LOTR1282(b10,b11,1);\ + s[5]=LOTR1282(b20,b21,16);\ + s[7]=LOTR1282(b30,b31,25);\ +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + + *clen = mlen + CRYPTO_ABYTES; + u64 b11, b21, b31, b10, b20, b30; + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u64 s[8] = { 0 }; + u64 i; + u8 tempData[32] = { 0 }; + // initialization + memcpy(s, npub, CRYPTO_NPUBBYTES); + memcpy(s + 4, k, CRYPTO_KEYBYTES); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND512(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + s[0] ^= U64BIG(((u64*)ad)[0]); + s[1] ^= U64BIG(((u64*)ad)[1]); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + adlen -= RATE; + ad += RATE; + } + + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + s[0] ^= U64BIG(((u64*)tempData)[0]); + s[1] ^= U64BIG(((u64*)tempData)[1]); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + } + s[7] ^= 0x8000000000000000; + // process plaintext + if (mlen) { + while (mlen >= RATE) { + s[0] ^= U64BIG(((u64*)m)[0]); + s[1] ^= U64BIG(((u64*)m)[1]); + memcpy(c, s, RATE); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + s[0] ^= U64BIG(((u64*)tempData)[0]); + s[1] ^= U64BIG(((u64*)tempData)[1]); + memcpy(c, s, mlen); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND512(i); + } + // return tag + memcpy(c, s, CRYPTO_ABYTES); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + u64 b11, b21, b31, b10, b20, b30; + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u64 s[8] = { 0 }; + u64 i; + u8 tempData[32] = { 0 }; + // initialization + memcpy(s, npub, CRYPTO_NPUBBYTES); + memcpy(s + 4, k, CRYPTO_KEYBYTES); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND512(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + s[0] ^= U64BIG(((u64*)ad)[0]); + s[1] ^= U64BIG(((u64*)ad)[1]); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + adlen -= RATE; + ad += RATE; + } + + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + s[0] ^= U64BIG(((u64*)tempData)[0]); + s[1] ^= U64BIG(((u64*)tempData)[1]); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + } + s[7] ^= 0x8000000000000000; + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= RATE) { + + U64BIG(((u64*)m)[0]) = s[0] ^ U64BIG(((u64*)c)[0]); + U64BIG(((u64*)m)[1]) = s[1] ^ U64BIG(((u64*)c)[1]); + memcpy(s, c, RATE); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + clen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, c, clen); + tempData[clen] = 0x01; + s[0] ^= U64BIG(((u64*)tempData)[0]); + s[1] ^= U64BIG(((u64*)tempData)[1]); + memcpy(m, s, clen); + memcpy(s, c, clen); + c += clen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND512(i); + } + if (memcmp((void*)s, (void*)c, CRYPTO_ABYTES)) { + memset(m, 0, (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} + + diff --git a/knot/Implementations/crypto_aead/knot256/opt_SSE/api.h b/knot/Implementations/crypto_aead/knot256/opt_SSE/api.h new file mode 100644 index 0000000..c31160d --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/opt_SSE/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 32 //256/8=32 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 32 +#define CRYPTO_ABYTES 32 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot256/opt_SSE/encrypt.c b/knot/Implementations/crypto_aead/knot256/opt_SSE/encrypt.c new file mode 100644 index 0000000..2201c77 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/opt_SSE/encrypt.c @@ -0,0 +1,205 @@ +#include +#include "api.h" +#include +#include +#include //sse2 header file(include sse header file) +#define U64BIG(x) (x) +#define U32BIG(x) (x) + +#define PR0_ROUNDS 100 +#define PR_ROUNDS 52 +#define PRF_ROUNDS 56 + + +typedef unsigned char u8; +typedef unsigned long long u64; +typedef long long i64; + +#define forward_sbox_SSE(a, b, c, d, f, g, h) \ +{ \ +tmm1 =_mm_xor_si128( a , all1 ); \ +tmm2 =_mm_and_si128( b, tmm1 ); \ +tmm3 =_mm_xor_si128( c , tmm2 ); \ +h=_mm_xor_si128( d , tmm3 ); \ +tmm5 =_mm_or_si128 ( b, c); \ +tmm6=_mm_xor_si128( d, tmm1 ); \ +g=_mm_xor_si128( tmm5, tmm6 ); \ +tmm8=_mm_xor_si128( b, d ); \ +tmm9=_mm_and_si128( tmm3, tmm6 ); \ +a=_mm_xor_si128( tmm8, tmm9 ); \ +tmm11=_mm_and_si128( g, tmm8 ); \ +f=_mm_xor_si128( tmm3, tmm11 ); \ +} + +u8 constant7[100] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, + 0x03, 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, + 0x23, 0x47, 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, + 0x0b, 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, + 0x29, 0x53, 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, + 0x43, 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, + 0x13, 0x26, 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, + 0x37, 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, + 0x1a, 0x34, 0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d }; +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define LOTR1281(a,b,n) (((a)<<(n))|((b)>>(64-n))) +#define LOTR1282(a,b,n) (((b)<<(n))|((a)>>(64-n))) + +#define U64BIG(x) (x) + +#define RATE 16 + +#define PR0_ROUNDS 100 +#define PR_ROUNDS 52 +#define PRF_ROUNDS 56 + +#define ROUND512(i) {\ +state[0] = _mm_xor_si128(state[0], _mm_set_epi64x(0, (u64)constant7[i])); \ +forward_sbox_SSE(state[0], state[1], state[2], state[3], out1, out2, out3); \ +state[1] = _mm_or_si128(_mm_slli_epi64(out1, 1), _mm_srli_epi64(_mm_shuffle_epi32(out1, _MM_SHUFFLE(1, 0, 3, 2)), 63)); \ +state[2] = _mm_or_si128(_mm_slli_epi64(out2, 16), _mm_srli_epi64(_mm_shuffle_epi32(out2, _MM_SHUFFLE(1, 0, 3, 2)), 48)); \ +state[3] = _mm_or_si128(_mm_slli_epi64(out3, 25), _mm_srli_epi64(_mm_shuffle_epi32(out3, _MM_SHUFFLE(1, 0, 3, 2)), 39)); \ +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + *clen = mlen + CRYPTO_ABYTES; + __m128i all1 = _mm_set1_epi32(0xffffffff); + //__m128i tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7, tmm8, tmm9, tmm10, tmm11, out1, out2, out3; + __m128i tmm1, tmm2, tmm3, tmm5, tmm6, tmm8, tmm9, tmm11, out1, out2, out3; + __m128i state[4]; + + u64 i; + u8 tempData[16] = { 0 }; + // initialization + state[0] = _mm_loadu_si128((__m128i*)(npub)); + state[1] = _mm_loadu_si128((__m128i*)(npub + 16)); + state[2] = _mm_loadu_si128((__m128i*)(k)); + state[3] = _mm_loadu_si128((__m128i*)(k + 16)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND512(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(ad))); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(tempData))); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + } + state[3] = _mm_xor_si128(state[3], _mm_set_epi64x((u64)0x8000000000000000, 0)); + // process plaintext + if (mlen) { + while (mlen >= RATE) { + state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(m))); + memcpy(c, state, RATE); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(tempData))); + memcpy(c, state, mlen); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND512(i); + } + // return tag + memcpy(c, state, sizeof(unsigned char) * CRYPTO_ABYTES); + return 0; +} +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + __m128i all1 = _mm_set1_epi32(0xffffffff); + __m128i tmm1, tmm2, tmm3, tmm5, tmm6, tmm8, tmm9, tmm11, out1, out2, out3; + __m128i state[4]; + + u64 i; + u8 tempData[16] = { 0 }; + // initialization + state[0] = _mm_loadu_si128((__m128i*)(npub)); + state[1] = _mm_loadu_si128((__m128i*)(npub + 16)); + state[2] = _mm_loadu_si128((__m128i*)(k)); + state[3] = _mm_loadu_si128((__m128i*)(k + 16)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND512(i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(ad))); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(tempData))); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + } + state[3] = _mm_xor_si128(state[3], _mm_set_epi64x((u64)0x8000000000000000, 0)); + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= RATE) { + state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(c))); + memcpy(m, state, RATE); + memcpy(state, c, RATE); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(i); + } + clen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, c, clen); + tempData[clen] = 0x01; + state[0] = _mm_xor_si128(state[0], _mm_loadu_si128((__m128i*)(tempData))); + memcpy(m, state, clen); + memcpy(state, c, clen); + c += clen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND512(i); + } + if (memcmp((void*)state, (void*)c, CRYPTO_ABYTES)) { + memset(m, 0, sizeof(unsigned char) * (*mlen)); + *mlen = 0; + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/api.h b/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/api.h new file mode 100644 index 0000000..ba6bd20 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 diff --git a/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/auxFormat.c b/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/auxFormat.c new file mode 100644 index 0000000..c2d5b85 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/auxFormat.c @@ -0,0 +1,320 @@ +#include"auxFormat.h" + +void P256(unsigned int *s, unsigned char *rc, unsigned char rounds) +{ + unsigned int reg1, reg2; + asm volatile ( + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], s[4], s[6]); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[S_4], %[reg1] \n\t" + "orrs %[S_4], %[S_2], %[S_4] \n\t" + "eors %[S_0], %[S_6], %[S_0] \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] \n\t" + "eors %[S_6], %[S_6], %[reg1] \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], s[3], s[5], s[7]) */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_3], %[S_1] \n\t" + "eors %[reg1], %[S_5], %[reg1] \n\t" + "orrs %[S_5], %[S_3], %[S_5] \n\t" + "eors %[S_1], %[S_7], %[S_1] \n\t" + "eors %[S_5], %[S_5], %[S_1] \n\t" + "eors %[reg2], %[S_3], %[S_7] \n\t" + "eors %[S_7], %[S_7], %[reg1] \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[reg2],%[S_1] \n\t" + "ands %[S_3], %[S_5], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "enc_loop2: \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_3], %[reg1],%[S_3] \n\t" + + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_3], %[reg1],%[S_3] \n\t" + "/*loop control*/ \n\t" + "subs %[ro], %[ro], #1 \n\t" + "bne enc_loop2 \n\t" + + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "adds %[rc], %[rc], #1 \n\t" + "/*sbox first column 0,2,4,6 sbox1(s[0], s[2], ROR(s[4], 28), ROR(s[6], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_2], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_2], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_6] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_2], %[S_6] , ROR #20 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_2], %[S_4], %[reg2] \n\t" + "eors %[S_2], %[reg1], %[S_2] \n\t" + "/*sbox first column 1,3,5,7 sbox1(s[1], ROR(s[3], 31), ROR(s[5], 28), ROR(s[7], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_3] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_3], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_3] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_7] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_7] , ROR #19, %[S_3] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_7], %[S_3] , ROR #12 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_3], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_3], %[reg1],%[S_3] \n\t" + "/*add round const*/ \n\t" + "ldrb %[reg1], [%[rc]] \n\t" + "and %[reg2], %[reg1], 0xf \n\t" + "eors %[S_0], %[S_0], %[reg1],LSR #4 \n\t" /*s[0] ^= constant6Format[lunNum]>>4;*/\ + "eors %[S_1], %[S_1], %[reg2] \n\t" /*s[1] ^= constant6Format[lunNum] & 0x0f;*/\ + "/*sbox first column 0,3,4,7 sbox1(s[0], s[3],ROR(s[4], 28), ROR(s[7], 20)); */ \n\t" + "mvns %[S_0], %[S_0] \n\t" + "ands %[reg1], %[S_3], %[S_0] \n\t" + "eors %[reg1], %[reg1],%[S_4] , ROR #28 \n\t" + "orrs %[S_4], %[S_3], %[S_4] , ROR #28 \n\t" + "eors %[S_0], %[S_0], %[S_7] , ROR #20 \n\t" + "eors %[S_4], %[S_4], %[S_0] \n\t" + "eors %[reg2], %[S_3], %[S_7] , ROR #20 \n\t" + "eors %[S_7], %[reg1],%[S_7] , ROR #20 \n\t" + "ands %[S_0], %[reg1],%[S_0] \n\t" + "eors %[S_0], %[reg2],%[S_0] \n\t" + "ands %[S_3], %[S_4], %[reg2] \n\t" + "eors %[S_3], %[reg1], %[S_3] \n\t" + "/*sbox first column 1,2,5,6 sbox1(s[1], ROR(s[2], 31), ROR(s[5], 28), ROR(s[6], 19)); */ \n\t" + "mvns %[S_1], %[S_1] \n\t" + "ands %[reg1], %[S_1], %[S_2] , ROR #31 \n\t" + "eors %[reg1], %[reg1], %[S_5] , ROR #28 \n\t" +"/*orrs %[S_5], %[S_5], ROR #28 %[S_2], ROR #31 31-28=3*/ \n\t" + "orrs %[S_5], %[S_5], %[S_2] , ROR #3 \n\t" + "eors %[S_1], %[S_1], %[S_6] , ROR #19 \n\t" + "eors %[S_5], %[S_1], %[S_5] , ROR #28 /* 31-28=3*/ \n\t" +"/*eors %[reg2], %[S_6] , ROR #19, %[S_2] , ROR #31 31-19=12*/ \n\t" + "eors %[reg2], %[S_6], %[S_2] , ROR #12 \n\t" + "eors %[S_6], %[reg1],%[S_6] , ROR #19 \n\t" + "ands %[S_1], %[reg1],%[S_1] \n\t" + "eors %[S_1], %[S_1], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "ands %[S_2], %[S_5], %[reg2] , ROR #19 /* 31-19=12*/ \n\t" + "eors %[S_2], %[reg1],%[S_2] \n\t" + "ROR %[S_3], #31 \n\t" + "ROR %[S_4], #28 \n\t" + "ROR %[S_5], #28 \n\t" + "ROR %[S_6], #20 \n\t" + "ROR %[S_7], #19 \n\t" + : /* output variables - including inputs that are changed */ + [ro] "+r" (rounds),[reg1] "=r" (reg1), [reg2] "=r" (reg2), [rc] "+r" (rc), + [S_0] "+r" (s[0]), [S_2] "+r" (s[2]), [S_4] "+r" (s[4]), [S_6] "+r" (s[6]) , + [S_1] "+r" (s[1]), [S_3] "+r" (s[3]), [S_5] "+r" (s[5]), [S_7] "+r" (s[7]) + : /* input variables */ + : /* clobber registers for temporary values */ + ); +} + +void unpackFormat(u8 * out, u32 * in) { + u32 t[2] = { 0 }; + t[1] = (in[0] & 0xFFFF0000) | (in[1] >> 16); + t[0] = (in[1] & 0x0000FFFF) | (in[0] << 16); + u32 r0, r1; + r0 = (t[0] ^ (t[0] >> 8)) & 0x0000FF00, t[0] ^= r0 ^ (r0 << 8); + r0 = (t[0] ^ (t[0] >> 4)) & 0x00F000F0, t[0] ^= r0 ^ (r0 << 4); + r0 = (t[0] ^ (t[0] >> 2)) & 0x0C0C0C0C, t[0] ^= r0 ^ (r0 << 2); + r0 = (t[0] ^ (t[0] >> 1)) & 0x22222222, t[0] ^= r0 ^ (r0 << 1); + r1 = (t[1] ^ (t[1] >> 8)) & 0x0000FF00, t[1] ^= r1 ^ (r1 << 8); + r1 = (t[1] ^ (t[1] >> 4)) & 0x00F000F0, t[1] ^= r1 ^ (r1 << 4); + r1 = (t[1] ^ (t[1] >> 2)) & 0x0C0C0C0C, t[1] ^= r1 ^ (r1 << 2); + r1 = (t[1] ^ (t[1] >> 1)) & 0x22222222, t[1] ^= r1 ^ (r1 << 1); + memcpy(out, t, 8 * sizeof(unsigned char)); +} + + +void getU32Format(u32 *out, const u8* in) { + u32 r0, lo = U32BIG(((u32* )in)[0]); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + *out = lo; +} diff --git a/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/auxFormat.h b/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/auxFormat.h new file mode 100644 index 0000000..fb02b7c --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/auxFormat.h @@ -0,0 +1,14 @@ +#include"api.h" +#define U32BIG(x) (x) +#include + + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +void getU32Format(u32 *out, const u8* in); +void unpackFormat(u8 * out, u32 * in) ; +void P256(unsigned int *s, unsigned char *rc, unsigned char rounds); + + diff --git a/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/hash.c b/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/hash.c new file mode 100644 index 0000000..e96ab48 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/armcortexm_7/hash.c @@ -0,0 +1,115 @@ +#include"auxFormat.h" + + +//#define hash_RATE (32 / 8) +#define hash_RATE 4 + +#define PRH_ROUNDS 16 +//#define PRH_ROUNDS 33 +//#define PRH_ROUNDS 68 +unsigned char constant7Format[68] = { +/*constant7_hash_256v1:*/ + 0x1, + 0x10, + 0x2, + 0x20, + 0x4, + 0x40, + 0x9, + 0x11, + 0x12, + 0x22, + 0x24, + 0x44, + 0x49, + 0x18, + 0x3, + 0x30, + 0x6, + 0x60, + 0xd, + 0x51, + 0x1b, + 0x33, + 0x36, + 0x66, + 0x6d, + 0x5c, + 0x4a, + 0x28, + 0x5, + 0x50, + 0xb, + 0x31, + 0x16, + 0x62, + 0x2d, + 0x55, + 0x5b, + 0x3a, + 0x27, + 0x74, + 0x4f, + 0x78, + 0xe, + 0x61, + 0x1d, + 0x53, + 0x3b, + 0x37, + 0x76, + 0x6f, + 0x7c, + 0x4e, + 0x68, + 0xc, + 0x41, + 0x19, + 0x13, + 0x32, + 0x26, + 0x64, + 0x4d, + 0x58, + 0xa, + 0x21, + 0x14, + 0x42, + 0x29, + 0x15, +}; +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + u32 dataFormat[2] = { 0 }; + // initialization + u32 s[8] = { 0 }; + u8 tempData[32]; + //absorb + //RATE=4 + while (inlen >= hash_RATE) { + getU32Format(dataFormat, in); + s[0] ^= dataFormat[0] >>16; + s[1] ^= dataFormat[0] &0xffff; + P256(s, constant7Format, PRH_ROUNDS); + inlen -= hash_RATE; + in += hash_RATE; + } + memset(tempData, 0, hash_RATE); + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + getU32Format(dataFormat, tempData); + s[0] ^= dataFormat[0] >> 16; + s[1] ^= dataFormat[0] & 0xffff; + P256(s, constant7Format, PRH_ROUNDS); + //sequeez + unpackFormat(out, s); + unpackFormat((out + 8), (s + 2)); + P256(s, constant7Format, PRH_ROUNDS); + out += CRYPTO_BYTES / 2; + unpackFormat(out, s); + unpackFormat((out + 8), (s + 2)); + return 0; +} + + + diff --git a/knot/Implementations/crypto_hash/knot256v1/opt_1/api.h b/knot/Implementations/crypto_hash/knot256v1/opt_1/api.h new file mode 100644 index 0000000..7715d8e --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/opt_1/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 32 + diff --git a/knot/Implementations/crypto_hash/knot256v1/opt_1/hash.c b/knot/Implementations/crypto_hash/knot256v1/opt_1/hash.c new file mode 100644 index 0000000..f700e9d --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/opt_1/hash.c @@ -0,0 +1,77 @@ +#include"api.h" +#include +typedef unsigned char u8; +typedef unsigned long long u64; +typedef unsigned int u32; + +#define PRH_ROUNDS 68 +#define RATE 4 +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define LOTR64(x,n) (((x)<<(n))|((x)>>(64-(n)))) +#define ROUND256(i) {\ + x0^=constant7[i];\ + sbox(x0, x1, x2, x3, x5, x6, x7);\ + x1=LOTR64(x5,1);\ + x2=LOTR64(x6,8);\ + x3=LOTR64(x7,25);\ +} +#define U32BIG(x) (x) +#define U64BIG(x) (x) + +u8 constant7[127] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, + 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, + 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, + 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, + 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, + 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, + 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37, 0x6f, + 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, 0x1a, 0x34, + 0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c, 0x39, + 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a, 0x15, 0x2a, 0x55, 0x2b, 0x57, + 0x2f, 0x5f, 0x3f, 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40 }; +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u64 x3 = 0, x2 = 0, x1 = 0, x0 = 0, x7, x6, x5; + u64 i; + + u8 tempData[32]; + // initialization + //absorb + //RATE=4 + while (inlen >= RATE) { + x0 ^= (u64)U32BIG(((u32*)in)[0]); + for (i = 0; i < PRH_ROUNDS; i++) { + + ROUND256(i); + } + inlen -= RATE; + in += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + x0 ^= (u64)U32BIG(((u32*)tempData)[0]); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND256(i); + } + //sequeez + ((u64*)out)[0] = U64BIG(x0); + ((u64*)out)[1] = U64BIG(x1); + out += CRYPTO_BYTES / 2; + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND256(i); + } + ((u64*)out)[0] = U64BIG(x0); + ((u64*)out)[1] = U64BIG(x1); + return 0; +} + + + + diff --git a/knot/Implementations/crypto_hash/knot256v2/opt_1/api.h b/knot/Implementations/crypto_hash/knot256v2/opt_1/api.h new file mode 100644 index 0000000..7715d8e --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/opt_1/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 32 + diff --git a/knot/Implementations/crypto_hash/knot256v2/opt_1/hash.c b/knot/Implementations/crypto_hash/knot256v2/opt_1/hash.c new file mode 100644 index 0000000..976e4f1 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/opt_1/hash.c @@ -0,0 +1,82 @@ +#include "api.h" +#define PRH_ROUNDS 80 +typedef unsigned char u8; +typedef unsigned long long u64; +typedef long long i64; +typedef unsigned int u32; +#define RATE 16 +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} +#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32) +#define ROTR96MORE322(a,b,n) (b<>(96-n)) +#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n))) +#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n))) +#define U32BIG(x) (x) +#define U64BIG(x) (x) +u8 constant7[80] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, + 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, + 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, + 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, + 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, + 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, + 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37}; +#define ROUND384(i) {\ +x00 ^= constant7[i];\ +sbox(x00, x10, x20, x30, x50, x60, x70);\ +sbox(x01, x11, x21, x31, x51, x61, x71);\ +x11 = ROTR961(x51, x50, 1);\ +x10 = ROTR962(x51, x50, 1);\ +x21 = ROTR961(x61, x60, 8);\ +x20 = ROTR962(x61, x60, 8);\ +x31 = ROTR96MORE321(x71, x70, 55);\ +x30 = ROTR96MORE322(x71, x70, 55);\ +} +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + u64 i; + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0; + u32 x31 = 0x80000000, x21 = 0, x11 = 0, x01 = 0; + u64 x50, x60, x70; + u32 x51, x61, x71; + u8 tempData1[16] = { 0 }; + // initialization + //absorb + while (inlen >= RATE) { + x00 ^= U64BIG(*(u64*)in); + x01 ^= U32BIG(*(u32*)(in + 8)); + x10 ^= U32BIG(*(u32*)(in + 12)); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + inlen -= RATE; + in += RATE; + } + + memset(tempData1, 0, RATE); + memcpy(tempData1, in, inlen ); + tempData1[inlen] = 0x01; + x00 ^= U64BIG(*(u64*)tempData1); + x01 ^= U32BIG(*(u32*)(tempData1 + 8)); + x10 ^= U32BIG(*(u32*)(tempData1 + 12)); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + //sequeez + *(u64*)(out) = U64BIG(x00); + *(u32*)(out + 8) = U32BIG(x01); + *(u32*)(out + 12) = U64BIG(x10); + out += CRYPTO_BYTES / 2; + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + *(u64*)(out) = U64BIG(x00); + *(u32*)(out + 8) = U32BIG(x01); + *(u32*)(out + 12) = U64BIG(x10); + return 0; +} + + + diff --git a/knot/Implementations/crypto_hash/knot384/opt_1/api.h b/knot/Implementations/crypto_hash/knot384/opt_1/api.h new file mode 100644 index 0000000..dd41001 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/opt_1/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 48 + diff --git a/knot/Implementations/crypto_hash/knot384/opt_1/hash.c b/knot/Implementations/crypto_hash/knot384/opt_1/hash.c new file mode 100644 index 0000000..d3f04ea --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/opt_1/hash.c @@ -0,0 +1,87 @@ +#include "api.h" +#include + +typedef unsigned char u8; +typedef unsigned long long u64; +typedef unsigned int u32; + +#define RATE 6 +#define PRH_ROUNDS 104 +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} +#define ROTR961(a,b,n) (((a)<<(n))|((b)>>(64-n))) +#define ROTR962(a,b,n) (((b)<<(n))|((a)>>(32-n))) + +#define ROTR96MORE321(a,b,n) ((b<<(n-32))>>32) +#define ROTR96MORE322(a,b,n) (b<>(96-n)) + +#define U32BIG(x) (x) +#define U64BIG(x) (x) +u8 constant7[104] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03, 0x06, + 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a, 0x14, 0x28, 0x51, 0x23, 0x47, + 0x0f, 0x1e, 0x3c, 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b, 0x16, + 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a, 0x75, 0x6a, 0x54, 0x29, 0x53, + 0x27, 0x4f, 0x1f, 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43, 0x07, + 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09, 0x12, 0x24, 0x49, 0x13, 0x26, + 0x4d, 0x1b, 0x36, 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37, 0x6f, + 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31, 0x63, 0x46, 0x0d, 0x1a, 0x34, + 0x69, 0x52, 0x25, 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c }; +#define ROUND384(i) {\ +x00 ^= constant7[i];\ +sbox(x00, x10, x20, x30, x50, x60, x70);\ +sbox(x01, x11, x21, x31, x51, x61, x71);\ +x11 = ROTR961(x51, x50, 1);\ +x10 = ROTR962(x51, x50, 1);\ +x21 = ROTR961(x61, x60, 8);\ +x20 = ROTR962(x61, x60, 8);\ +x31 = ROTR96MORE321(x71, x70, 55);\ +x30 = ROTR96MORE322(x71, x70, 55);\ +} +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + u64 i; + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0; + u32 x31 = 0, x21 = 0, x11 = 0, x01 = 0; + u64 x50, x60, x70; + u32 x51, x61, x71; + u8 tempData1[24] = { 0 }; + // initialization + //absorb + while (inlen >= RATE) { + //x00 ^= U64BIG(*(u64*)(in)) & (0x0000FFFFFFFFFFFFULL); + memcpy(&tempData1, in, RATE); + x00 ^= U64BIG(((u64*)tempData1)[0]); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + inlen -= RATE; + in += RATE; + } + memset(tempData1, 0, RATE); + memcpy(tempData1, in, inlen * sizeof(unsigned char)); + tempData1[inlen] = 0x01; + x00 ^= U64BIG(((u64*)tempData1)[0]); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + //sequeez + *(u64*)(out) = U64BIG(x00); + *(u32*)(out + 8) = U32BIG(x01); + *(u64*)tempData1 = U64BIG(x10); + *(u32*)(tempData1 + 8) = U32BIG(x11); + memcpy(out+12, tempData1, CRYPTO_BYTES /4); + out += CRYPTO_BYTES / 2; + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND384(i); + } + *(u64*)(out) = U64BIG(x00); + *(u32*)(out + 8) = U32BIG(x01); + *(u64*)tempData1 = U64BIG(x10); + *(u32*)(tempData1 + 8) = U32BIG(x11); + memcpy(out + 12, tempData1, CRYPTO_BYTES / 4); + return 0; +} + diff --git a/knot/Implementations/crypto_hash/knot512/opt_1/api.h b/knot/Implementations/crypto_hash/knot512/opt_1/api.h new file mode 100644 index 0000000..99fe592 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/opt_1/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 64 + diff --git a/knot/Implementations/crypto_hash/knot512/opt_1/hash.c b/knot/Implementations/crypto_hash/knot512/opt_1/hash.c new file mode 100644 index 0000000..91b3aa8 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/opt_1/hash.c @@ -0,0 +1,98 @@ +#include "api.h" +typedef unsigned char u8; +typedef unsigned long long u64; + +#define PRH_ROUNDS 140 + +#define RATE 8 +#define ROTR(x,n) (((x)>>(n))|((x)<<(64-(n)))) + +#define U64BIG(x) (x) + +static const u8 constant8[255] = { 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, + 0x8e, 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25, 0x4b, 0x97, 0x2e, + 0x5c, 0xb8, 0x70, 0xe0, 0xc0, 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, + 0xc9, 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37, 0x6e, 0xdc, 0xb9, + 0x72, 0xe4, 0xc8, 0x90, 0x20, 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, + 0xad, 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac, 0x59, 0xb2, 0x65, + 0xcb, 0x96, 0x2c, 0x58, 0xb0, 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, + 0xfb, 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a, 0xf5, 0xeb, 0xd7, + 0xae, 0x5d, 0xba, 0x74, 0xe8, 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, + 0x86, 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7, 0x8f, 0x1e, 0x3c, + 0x79, 0xf3, 0xe7, 0xce, 0x9c, 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, + 0xc5, 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4, 0x48, 0x91, 0x22, + 0x45, 0x8a, 0x14, 0x29, 0x52, 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, + 0xa7, 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76, 0xec, 0xd9, 0xb3, + 0x67, 0xcf, 0x9e, 0x3d, 0x7b, 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, + 0xf4, 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd, 0x9a, 0x35, 0x6a, + 0xd4, 0xa8, 0x51, 0xa3, 0x46, 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, + 0x0e, 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab, 0x57, 0xaf, 0x5f, + 0xbe, 0x7c, 0xf9, 0xf2, 0xe5, 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, + 0x09, 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe, 0xfc, 0xf8, 0xf0, + 0xe1, 0xc2, 0x85, 0x0b, 0x17, 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, + 0x8d, 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80 }; +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR1281(a,b,n) (((a)<<(n))|((b)>>(64-n))) +#define LOTR1282(a,b,n) (((b)<<(n))|((a)>>(64-n))) + +#define ROUND512(i) {\ + x00^=constant8[i];\ + sbox(x00, x10, x20, x30, b10, b20, b30);\ + sbox(x01, x11, x21, x31, b11, b21, b31);\ + x10=LOTR1281(b10,b11,1);\ + x20=LOTR1281(b20,b21,16);\ + x30=LOTR1281(b30,b31,25);\ + x11=LOTR1282(b10,b11,1);\ + x21=LOTR1282(b20,b21,16);\ + x31=LOTR1282(b30,b31,25);\ +} +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + + u64 b01, b11, b21, b31, b00, b10, b20, b30; + u64 t1, t2, t3, t5, t6, t8, t9, t11; + u64 x30 = 0, x20 = 0, x10 = 0, x00 = 0, x31 = 0, x21 = 0, x11 = 0, x01 = 0; + u64 i; + u8 tempData[32]; + + // initialization + //absorb + while (inlen >= RATE) { + x00 ^= U64BIG(((u64*)in)[0]); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND512(i); + } + inlen -= RATE; + in += RATE; + } + memset(tempData, 0, RATE); + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + x00 ^= U64BIG(((u64*)tempData)[0]); + + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND512(i); + } + //sequeez + + ((u64*)out)[0] = U64BIG(x00); + ((u64*)out)[1] = U64BIG(x01); + ((u64*)out)[2] = U64BIG(x10); + ((u64*)out)[3] = U64BIG(x11); + out += CRYPTO_BYTES / 2; + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND512(i); + } + ((u64*)out)[0] = U64BIG(x00); + ((u64*)out)[1] = U64BIG(x01); + ((u64*)out)[2] = U64BIG(x10); + ((u64*)out)[3] = U64BIG(x11); + return 0; +} + + diff --git a/knot/Implementations/crypto_hash/knot512/opt_SSE/api.h b/knot/Implementations/crypto_hash/knot512/opt_SSE/api.h new file mode 100644 index 0000000..de9380d --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/opt_SSE/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 64 diff --git a/knot/Implementations/crypto_hash/knot512/opt_SSE/hash.c b/knot/Implementations/crypto_hash/knot512/opt_SSE/hash.c new file mode 100644 index 0000000..6d33b6b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/opt_SSE/hash.c @@ -0,0 +1,109 @@ +#include "api.h" +typedef unsigned char u8; +typedef unsigned long long u64; + +#define PRH_ROUNDS 140 +#define RATE 8 +#define U64BIG(x) (x) +#include +#include +#include + +#include +#include +#include //sse2 header file(include sse header file) +#include //SSE3(include emmintrin.h) +#include //SSSE3(include pmmintrin.h) +#include //SSE4.1(include tmmintrin.h) +#include //SSE4.2(include smmintrin.h) +#include +#include +#include +#define U64BIG(x) (x) +#define U32BIG(x) (x) +static const u8 constant8[255] = { 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, + 0x8e, 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25, 0x4b, 0x97, 0x2e, + 0x5c, 0xb8, 0x70, 0xe0, 0xc0, 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, + 0xc9, 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37, 0x6e, 0xdc, 0xb9, + 0x72, 0xe4, 0xc8, 0x90, 0x20, 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, + 0xad, 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac, 0x59, 0xb2, 0x65, + 0xcb, 0x96, 0x2c, 0x58, 0xb0, 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, + 0xfb, 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a, 0xf5, 0xeb, 0xd7, + 0xae, 0x5d, 0xba, 0x74, 0xe8, 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, + 0x86, 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7, 0x8f, 0x1e, 0x3c, + 0x79, 0xf3, 0xe7, 0xce, 0x9c, 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, + 0xc5, 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4, 0x48, 0x91, 0x22, + 0x45, 0x8a, 0x14, 0x29, 0x52, 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, + 0xa7, 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76, 0xec, 0xd9, 0xb3, + 0x67, 0xcf, 0x9e, 0x3d, 0x7b, 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, + 0xf4, 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd, 0x9a, 0x35, 0x6a, + 0xd4, 0xa8, 0x51, 0xa3, 0x46, 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, + 0x0e, 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab, 0x57, 0xaf, 0x5f, + 0xbe, 0x7c, 0xf9, 0xf2, 0xe5, 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, + 0x09, 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe, 0xfc, 0xf8, 0xf0, + 0xe1, 0xc2, 0x85, 0x0b, 0x17, 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, + 0x8d, 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80 }; +#define forward_sbox_SSE(a, b, c, d, f, g, h) \ +{ \ +tmm1 =_mm_xor_si128( a , all1 ); \ +tmm2 =_mm_and_si128( b, tmm1 ); \ +tmm3 =_mm_xor_si128( c , tmm2 ); \ +h=_mm_xor_si128( d , tmm3 ); \ +tmm5 =_mm_or_si128 ( b, c); \ +tmm6=_mm_xor_si128( d, tmm1 ); \ +g=_mm_xor_si128( tmm5, tmm6 ); \ +tmm8=_mm_xor_si128( b, d ); \ +tmm9=_mm_and_si128( tmm3, tmm6 ); \ +a=_mm_xor_si128( tmm8, tmm9 ); \ +tmm11=_mm_and_si128( g, tmm8 ); \ +f=_mm_xor_si128( tmm3, tmm11 ); \ +} + +#define ROUND512(i) {\ +state[0] = _mm_xor_si128(state[0], _mm_set_epi64x(0,(u64)constant8[i])); \ +forward_sbox_SSE(state[0], state[1], state[2], state[3], out1, out2, out3);\ +state[1] = _mm_or_si128(_mm_slli_epi64(out1, 1), _mm_srli_epi64(_mm_shuffle_epi32(out1, _MM_SHUFFLE(1, 0, 3, 2)), 63));\ +state[2] = _mm_or_si128(_mm_slli_epi64(out2, 16), _mm_srli_epi64(_mm_shuffle_epi32(out2, _MM_SHUFFLE(1, 0, 3, 2)), 48));\ +state[3] = _mm_or_si128(_mm_slli_epi64(out3, 25), _mm_srli_epi64(_mm_shuffle_epi32(out3, _MM_SHUFFLE(1, 0, 3, 2)), 39));\ +} +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen) { + __m128i all1 = _mm_set1_epi32(0xffffffff); + __m128i tmm0, tmm1, tmm2, tmm3, tmm5, tmm6, tmm8, tmm9, tmm11, out1, out2, out3; + __m128i state[4] = { 0 }; + u8 i=0; + u8 tempData[8] = { 0 }; + // initialization + //absorb + //rlen = inlen; + //RATE=8 + while (inlen >= RATE) { + tmm0 = _mm_loadl_epi64((__m128i*)in); + state[0] = _mm_xor_si128(state[0], tmm0); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND512(i); + } + inlen -= RATE; + in += RATE; + } + + memset(tempData, 0, 8); + memcpy(tempData, in, inlen * sizeof(unsigned char)); + tempData[inlen] = 0x01; + state[0] = _mm_xor_si128(state[0], _mm_loadl_epi64((__m128i*)(tempData))); + + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND512(i); + } + //sequeez + + memcpy(out, state, CRYPTO_BYTES / 2 * sizeof(unsigned char)); + for (i = 0; i < PRH_ROUNDS; i++) { + ROUND512(i); + } + + memcpy(out+ CRYPTO_BYTES / 2, state, CRYPTO_BYTES / 2 * sizeof(unsigned char)); + return 0; +} + + -- libgit2 0.26.0