Working towards a F7 template for benchmarking masked implementations

30e02cca · Sebastian Renner · dbbd736b · 30e02cca · 30e02cca · 30e02cca
Commit 30e02cca authored Aug 11, 2022 by Sebastian Renner
60 changed files
--- a/src-ascon/ascon/.clang-format
+++ b/src-ascon/ascon/.clang-format
+---
+Language:        Cpp
+# BasedOnStyle:  Google
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveMacros: false
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Left
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: true
+AllowAllConstructorInitializersOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortLambdasOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: WithoutElse
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: Yes
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterCaseLabel:  false
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  AfterExternBlock: false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeInheritanceComma: false
+BreakInheritanceList: BeforeColon
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     80
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DeriveLineEnding: true
+DerivePointerAlignment: true
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IncludeBlocks:   Regroup
+IncludeCategories:
+  - Regex:           '^<ext/.*\.h>'
+    Priority:        2
+    SortPriority:    0
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+    SortPriority:    0
+  - Regex:           '^<.*'
+    Priority:        2
+    SortPriority:    0
+  - Regex:           '.*'
+    Priority:        3
+    SortPriority:    0
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IncludeIsMainSourceRegex: ''
+IndentCaseLabels: true
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentWidth:     2
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Never
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+RawStringFormats:
+  - Language:        Cpp
+    Delimiters:
+      - cc
+      - CC
+      - cpp
+      - Cpp
+      - CPP
+      - 'c++'
+      - 'C++'
+    CanonicalDelimiter: ''
+    BasedOnStyle:    google
+  - Language:        TextProto
+    Delimiters:
+      - pb
+      - PB
+      - proto
+      - PROTO
+    EnclosingFunctions:
+      - EqualsProto
+      - EquivToProto
+      - PARSE_PARTIAL_TEXT_PROTO
+      - PARSE_TEST_PROTO
+      - PARSE_TEXT_PROTO
+      - ParseTextOrDie
+      - ParseTextProtoOrDie
+    CanonicalDelimiter: ''
+    BasedOnStyle:    google
+ReflowComments:  true
+SortIncludes:    true
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  false
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpaceBeforeSquareBrackets: false
+Standard:        Auto
+StatementMacros:
+  - Q_UNUSED
+  - QT_REQUIRE_VERSION
+TabWidth:        8
+UseCRLF:         false
+UseTab:          Never
+...
--- a/src-ascon/ascon/Documents/documentation.pdf
+++ b/src-ascon/ascon/Documents/documentation.pdf
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/LWC_AEAD_KAT_128_128.txt
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/LWC_AEAD_KAT_128_128.txt
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/aead.c
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/aead.c
+#include "api.h"
+#include "ascon.h"
+#include "permutations.h"
+#include "printstate.h"
+void ascon_initaead(state_t* s, const mask_npub_uint32_t* n,
+                    const mask_key_uint32_t* k) {
+  word_t N0, N1;
+  word_t K1, K2;
+  /* randomize the initial state */
+  s->x[5] = MZERO(NUM_SHARES_KEY);
+  s->x[0] = MZERO(NUM_SHARES_KEY);
+  /* set the initial value */
+  s->x[0].s[0].w[0] ^= 0x08220000;
+  s->x[0].s[0].w[1] ^= 0x80210000;
+  /* set the nonce */
+  s->x[3] = N0 = MLOAD((uint32_t*)n, NUM_SHARES_NPUB);
+  s->x[4] = N1 = MLOAD((uint32_t*)(n + 2), NUM_SHARES_NPUB);
+  /* first key xor */
+  s->x[1] = K1 = MLOAD((uint32_t*)k, NUM_SHARES_KEY);
+  s->x[2] = K2 = MLOAD((uint32_t*)(k + 2), NUM_SHARES_KEY);
+  printstate("init 1st key xor", s, NUM_SHARES_KEY);
+  /* compute the permutation */
+  P(s, ASCON_PA_ROUNDS, NUM_SHARES_KEY);
+  /* second key xor */
+  s->x[3] = MXOR(s->x[3], K1, NUM_SHARES_KEY);
+  s->x[4] = MXOR(s->x[4], K2, NUM_SHARES_KEY);
+  printstate("init 2nd key xor", s, NUM_SHARES_KEY);
+}
+void ascon_adata(state_t* s, const mask_ad_uint32_t* ad, uint64_t adlen) {
+  const int nr = ASCON_PB_ROUNDS;
+  if (adlen) {
+    /* full associated data blocks */
+    while (adlen >= ASCON_AEAD_RATE) {
+      word_t as = MLOAD((uint32_t*)ad, NUM_SHARES_AD);
+      s->x[0] = MXOR(s->x[0], as, NUM_SHARES_AD);
+      printstate("absorb adata", s, NUM_SHARES_AD);
+      P(s, nr, NUM_SHARES_AD);
+      adlen -= ASCON_AEAD_RATE;
+      ad += 2;
+    }
+    /* final associated data block */
+    s->x[0].s[0].w[1] ^= 0x80000000 >> (adlen * 4);
+    if (adlen) {
+      word_t as = MLOAD((uint32_t*)ad, NUM_SHARES_AD);
+      s->x[0] = MXOR(s->x[0], as, NUM_SHARES_AD);
+    }
+    printstate("pad adata", s, NUM_SHARES_AD);
+    P(s, nr, NUM_SHARES_AD);
+  }
+  /* domain separation */
+  s->x[4].s[0].w[0] ^= 1;
+  printstate("domain separation", s, NUM_SHARES_AD);
+}
+void ascon_encrypt(state_t* s, mask_c_uint32_t* c, const mask_m_uint32_t* m,
+                   uint64_t mlen) {
+  const int nr = ASCON_PB_ROUNDS;
+  /* full plaintext blocks */
+  while (mlen >= ASCON_AEAD_RATE) {
+    word_t ms = MLOAD((uint32_t*)m, NUM_SHARES_M);
+    s->x[0] = MXOR(s->x[0], ms, NUM_SHARES_M);
+    MSTORE((uint32_t*)c, s->x[0], NUM_SHARES_C);
+    printstate("absorb plaintext", s, NUM_SHARES_M);
+    P(s, nr, NUM_SHARES_M);
+    mlen -= ASCON_AEAD_RATE;
+    m += 2;
+    c += 2;
+  }
+  /* final plaintext block */
+  s->x[0].s[0].w[1] ^= 0x80000000 >> (mlen * 4);
+  if (mlen) {
+    word_t ms = MLOAD((uint32_t*)m, NUM_SHARES_M);
+    s->x[0] = MXOR(s->x[0], ms, NUM_SHARES_M);
+    MSTORE((uint32_t*)c, s->x[0], NUM_SHARES_C);
+  }
+  printstate("pad plaintext", s, NUM_SHARES_M);
+}
+void ascon_decrypt(state_t* s, mask_m_uint32_t* m, const mask_c_uint32_t* c,
+                   uint64_t clen) {
+  const int nr = ASCON_PB_ROUNDS;
+  /* full ciphertext blocks */
+  while (clen >= ASCON_AEAD_RATE) {
+    word_t cx = MLOAD((uint32_t*)c, NUM_SHARES_C);
+    s->x[0] = MXOR(s->x[0], cx, NUM_SHARES_C);
+    MSTORE((uint32_t*)m, s->x[0], NUM_SHARES_M);
+    s->x[0] = cx;
+    printstate("insert ciphertext", s, NUM_SHARES_M);
+    P(s, nr, NUM_SHARES_M);
+    clen -= ASCON_AEAD_RATE;
+    c += 2;
+    m += 2;
+  }
+  /* final ciphertext block */
+  s->x[0].s[0].w[1] ^= 0x80000000 >> (clen * 4);
+  if (clen) {
+    word_t cx = MLOAD((uint32_t*)c, NUM_SHARES_C);
+    s->x[0] = MXOR(s->x[0], cx, NUM_SHARES_C);
+    MSTORE((uint32_t*)m, s->x[0], NUM_SHARES_M);
+    word_t mask = MMASK(clen, NUM_SHARES_C);
+    s->x[0] = MXORAND(cx, s->x[0], mask, NUM_SHARES_C);
+  }
+  printstate("pad ciphertext", s, NUM_SHARES_M);
+}
+void ascon_final(state_t* s, const mask_key_uint32_t* k) {
+  word_t K1, K2;
+  K1 = MLOAD((uint32_t*)k, NUM_SHARES_KEY);
+  K2 = MLOAD((uint32_t*)(k + 2), NUM_SHARES_KEY);
+  /* first key xor (first 64-bit word) */
+  s->x[1] = MXOR(s->x[1], K1, NUM_SHARES_KEY);
+  /* first key xor (second 64-bit word) */
+  s->x[2] = MXOR(s->x[2], K2, NUM_SHARES_KEY);
+  printstate("final 1st key xor", s, NUM_SHARES_KEY);
+  /* compute the permutation */
+  P(s, ASCON_PA_ROUNDS, NUM_SHARES_KEY);
+  /* second key xor (first 64-bit word) */
+  s->x[3] = MXOR(s->x[3], K1, NUM_SHARES_KEY);
+  /* second key xor (second 64-bit word) */
+  s->x[4] = MXOR(s->x[4], K2, NUM_SHARES_KEY);
+  printstate("final 2nd key xor", s, NUM_SHARES_KEY);
+}
+void ascon_settag(state_t* s, mask_c_uint32_t* t) {
+  MSTORE((uint32_t*)t, s->x[3], NUM_SHARES_C);
+  MSTORE((uint32_t*)(t + 2), s->x[4], NUM_SHARES_C);
+}
+/* expected value of x3,x4 for P(0) */
+#if ASCON_PB_ROUNDS == 1
+static const uint32_t c[4] = {0x4b000009, 0x1c800003, 0x00000000, 0x00000000};
+#elif ASCON_PB_ROUNDS == 2
+static const uint32_t c[4] = {0x5d2d1034, 0x76fa81d1, 0x0cc1c9ef, 0xdb30a503};
+#elif ASCON_PB_ROUNDS == 3
+static const uint32_t c[4] = {0xbcaa1d46, 0xf1d0bde9, 0x32c4e651, 0x7b797cd9};
+#elif ASCON_PB_ROUNDS == 4
+static const uint32_t c[4] = {0xf7820616, 0xeffead2d, 0x94846901, 0xd4895cf5};
+#elif ASCON_PB_ROUNDS == 5
+static const uint32_t c[4] = {0x9e5ce5e3, 0xd40e9b87, 0x0bfc74af, 0xf8e408a9};
+#else /* ASCON_PB_ROUNDS == 6 */
+static const uint32_t c[4] = {0x11874f08, 0x7520afef, 0xa4dd41b4, 0x4bd6f9a4};
+#endif
+void ascon_xortag(state_t* s, const mask_c_uint32_t* t) {
+  /* set x0, x1, x2 to zero */
+  s->x[0] = MREUSE(s->x[0], 0, NUM_SHARES_KEY);
+  s->x[1] = MREUSE(s->x[1], 0, NUM_SHARES_KEY);
+  s->x[2] = MREUSE(s->x[2], 0, NUM_SHARES_KEY);
+  /* xor tag to x3, x4 */
+  word_t t0 = MLOAD((uint32_t*)t, NUM_SHARES_C);
+  s->x[3] = MXOR(s->x[3], t0, NUM_SHARES_C);
+  word_t t1 = MLOAD((uint32_t*)(t + 2), NUM_SHARES_C);
+  s->x[4] = MXOR(s->x[4], t1, NUM_SHARES_C);
+  /* compute P(0) if tags are equal */
+  P(s, ASCON_PB_ROUNDS, NUM_SHARES_KEY);
+  /* xor expected result to x3, x4 */
+  s->x[3].s[0].w[0] ^= c[0];
+  s->x[3].s[0].w[1] ^= c[1];
+  s->x[4].s[0].w[0] ^= c[2];
+  s->x[4].s[0].w[1] ^= c[3];
+}
+int ascon_iszero(state_t* s) {
+#if NUM_SHARES_KEY >= 2
+  s->x[3].s[0].w[0] ^= ROR32(s->x[3].s[1].w[0], ROT(1));
+  s->x[3].s[0].w[1] ^= ROR32(s->x[3].s[1].w[1], ROT(1));
+  s->x[4].s[0].w[0] ^= ROR32(s->x[4].s[1].w[0], ROT(1));
+  s->x[4].s[0].w[1] ^= ROR32(s->x[4].s[1].w[1], ROT(1));
+#endif
+#if NUM_SHARES_KEY >= 3
+  s->x[3].s[0].w[0] ^= ROR32(s->x[3].s[2].w[0], ROT(2));
+  s->x[3].s[0].w[1] ^= ROR32(s->x[3].s[2].w[1], ROT(2));
+  s->x[4].s[0].w[0] ^= ROR32(s->x[4].s[2].w[0], ROT(2));
+  s->x[4].s[0].w[1] ^= ROR32(s->x[4].s[2].w[1], ROT(2));
+#endif
+#if NUM_SHARES_KEY >= 4
+  s->x[3].s[0].w[0] ^= ROR32(s->x[3].s[3].w[0], ROT(3));
+  s->x[3].s[0].w[1] ^= ROR32(s->x[3].s[3].w[1], ROT(3));
+  s->x[4].s[0].w[0] ^= ROR32(s->x[4].s[3].w[0], ROT(3));
+  s->x[4].s[0].w[1] ^= ROR32(s->x[4].s[3].w[1], ROT(3));
+#endif
+  uint32_t result;
+  result = s->x[3].s[0].w[0] ^ s->x[3].s[0].w[1];
+  result ^= s->x[4].s[0].w[0] ^ s->x[4].s[0].w[1];
+  result |= result >> 16;
+  result |= result >> 8;
+  return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
+}
+int ascon_verify(state_t* s, const mask_c_uint32_t* t) {
+  ascon_xortag(s, t);
+  return ascon_iszero(s);
+}
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/api.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/api.h
+#define CRYPTO_VERSION "1.2.6"
+#define CRYPTO_KEYBYTES 16
+#define CRYPTO_NSECBYTES 0
+#define CRYPTO_NPUBBYTES 16
+#define CRYPTO_ABYTES 16
+#define CRYPTO_NOOVERLAP 1
+#define ASCON_AEAD_RATE 8
+#ifndef ASCON_ROR_SHARES
+#define ASCON_ROR_SHARES 5
+#endif
+#ifndef NUM_SHARES_M
+#define NUM_SHARES_M 1
+#endif
+#ifndef NUM_SHARES_C
+#define NUM_SHARES_C 1
+#endif
+#ifndef NUM_SHARES_AD
+#define NUM_SHARES_AD 1
+#endif
+#ifndef NUM_SHARES_NPUB
+#define NUM_SHARES_NPUB 1
+#endif
+#ifndef NUM_SHARES_KEY
+#define NUM_SHARES_KEY 1
+#endif
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/architectures
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/architectures
+arm
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/ascon.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/ascon.h
+#ifndef ASCON_H_
+#define ASCON_H_
+#include <stdint.h>
+#include "shares.h"
+#include "word.h"
+typedef struct {
+  word_t x[6];
+} state_t;
+void ascon_initaead(state_t* s, const mask_npub_uint32_t* n,
+                    const mask_key_uint32_t* k);
+void ascon_adata(state_t* s, const mask_ad_uint32_t* ad, uint64_t adlen);
+void ascon_encrypt(state_t* s, mask_c_uint32_t* c, const mask_m_uint32_t* m,
+                   uint64_t mlen);
+void ascon_decrypt(state_t* s, mask_m_uint32_t* m, const mask_c_uint32_t* c,
+                   uint64_t clen);
+void ascon_final(state_t* s, const mask_key_uint32_t* k);
+void ascon_settag(state_t* s, mask_c_uint32_t* t);
+int ascon_verify(state_t* s, const mask_c_uint32_t* t);
+void ascon_level_adata(state_t* s);
+void ascon_level_encdec(state_t* s);
+void ascon_level_final(state_t* s);
+#endif /* ASCON_H_ */
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/asm.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/asm.h
+#ifndef ASM_H_
+#define ASM_H_
+#ifndef __GNUC__
+#define __asm__ asm
+#endif
+#define LDR(rd, ptr, offset) \
+  __asm__ volatile("ldr %0, [%1, %2]\n\t" : "=r"(rd) : "r"(ptr), "ri"(offset))
+#define STR(rd, ptr, offset)                                                \
+  __asm__ volatile("str %0, [%1, %2]\n\t" ::"r"(rd), "r"(ptr), "ri"(offset) \
+                   : "memory")
+#define CLEAR()                                            \
+  do {                                                     \
+    uint32_t r, i = 0;                                     \
+    __asm__ volatile("mov %0, %1\n\t" : "=r"(r) : "i"(i)); \
+  } while (0)
+#define MOVI(rd, imm) __asm__ volatile("mov %0, %1\n\t" : "=r"(rd) : "i"(imm))
+#define RORI(rd, rn, imm) \
+  __asm__ volatile("ror %0, %1, #%c2\n\t" : "=r"(rd) : "r"(rn), "i"(imm))
+#define EOR_ROR(rd, rn, rm, imm)                  \
+  __asm__ volatile("eor %0, %1, %2, ror #%c3\n\t" \
+                   : "=r"(rd)                     \
+                   : "r"(rn), "r"(rm), "i"(imm))
+#define EOR_AND_ROR(ce, ae, be, imm, tmp)                       \
+  __asm__ volatile(                                             \
+      "and %[tmp_], %[ae_], %[be_], ror %[i1_]\n\t"             \
+      "eor %[ce_], %[tmp_], %[ce_]\n\t"                         \
+      : [ ce_ ] "+r"(ce), [ tmp_ ] "=r"(tmp)                    \
+      : [ ae_ ] "r"(ae), [ be_ ] "r"(be), [ i1_ ] "i"(ROT(imm)) \
+      :)
+#define EOR_BIC_ROR(ce, ae, be, imm, tmp)                       \
+  __asm__ volatile(                                             \
+      "bic %[tmp_], %[ae_], %[be_], ror %[i1_]\n\t"             \
+      "eor %[ce_], %[tmp_], %[ce_]\n\t"                         \
+      : [ ce_ ] "+r"(ce), [ tmp_ ] "=r"(tmp)                    \
+      : [ ae_ ] "r"(ae), [ be_ ] "r"(be), [ i1_ ] "i"(ROT(imm)) \
+      :)
+#define EOR_ORR_ROR(ce, ae, be, imm, tmp)                       \
+  __asm__ volatile(                                             \
+      "orr %[tmp_], %[ae_], %[be_], ror %[i1_]\n\t"             \
+      "eor %[ce_], %[tmp_], %[ce_]\n\t"                         \
+      : [ ce_ ] "+r"(ce), [ tmp_ ] "=r"(tmp)                    \
+      : [ ae_ ] "r"(ae), [ be_ ] "r"(be), [ i1_ ] "i"(ROT(imm)) \
+      :)
+#endif  // ASM_H_
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/config.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/config.h
+#ifndef CONFIG_H_
+#define CONFIG_H_
+#include "api.h"
+/* extern bit interleaving */
+#ifndef ASCON_EXTERN_BI
+#define ASCON_EXTERN_BI 0
+#endif
+/* number of PA rounds */
+#ifndef ASCON_PA_ROUNDS
+#define ASCON_PA_ROUNDS 12
+#endif
+/* number of PB rounds */
+#ifndef ASCON_PB_ROUNDS
+#define ASCON_PB_ROUNDS 6
+#endif
+#if NUM_SHARES_M != NUM_SHARES_KEY
+#error "NUM_SHARES_M != NUM_SHARES_KEY currently not supported"
+#endif
+#if NUM_SHARES_C != NUM_SHARES_M
+#error "NUM_SHARES_C != NUM_SHARES_M currently not supported"
+#endif
+#if NUM_SHARES_AD != NUM_SHARES_M
+#error "NUM_SHARES_AD != NUM_SHARES_M currently not supported"
+#endif
+#if NUM_SHARES_NPUB != NUM_SHARES_KEY
+#error "NUM_SHARES_NPUB != NUM_SHARES_KEY currently not supported"
+#endif
+#endif /* CONFIG_H_ */
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/constants.c
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/constants.c
+#include "constants.h"
+const uint8_t constants[] = {0xc, 0xc, 0x9, 0xc, 0xc, 0x9, 0x9, 0x9,
+                             0x6, 0xc, 0x3, 0xc, 0x6, 0x9, 0x3, 0x9,
+                             0xc, 0x6, 0x9, 0x6, 0xc, 0x3, 0x9, 0x3};
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/constants.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/constants.h
+#ifndef CONSTANTS_H_
+#define CONSTANTS_H_
+#include <stdint.h>
+#define ASCON_128_KEYBYTES 16
+#define ASCON_128A_KEYBYTES 16
+#define ASCON_80PQ_KEYBYTES 20
+#define ASCON_128_RATE 8
+#define ASCON_128A_RATE 16
+#define ASCON_HASH_RATE 8
+#define ASCON_PRF_IN_RATE 32
+#define ASCON_PRF_OUT_RATE 16
+#define ASCON_128_PA_ROUNDS 12
+#define ASCON_128_PB_ROUNDS 6
+#define ASCON_128A_PA_ROUNDS 12
+#define ASCON_128A_PB_ROUNDS 8
+#define ASCON_HASH_PA_ROUNDS 12
+#define ASCON_HASH_PB_ROUNDS 12
+#define ASCON_HASHA_PA_ROUNDS 12
+#define ASCON_HASHA_PB_ROUNDS 8
+#define ASCON_PRF_PA_ROUNDS 12
+#define ASCON_PRF_PB_ROUNDS 12
+#define ASCON_128_IV 0x8021000008220000ull
+#define ASCON_128A_IV 0x8822000000200000ull
+#define ASCON_80PQ_IV 0xc021000008220000ull
+#define ASCON_HASH_IV 0x0020000008200010ull
+#define ASCON_HASHA_IV 0x0020000008220010ull
+#define ASCON_XOF_IV 0x0020000008200000ull
+#define ASCON_XOFA_IV 0x0020000008220000ull
+#define ASCON_PRF_IV 0x88a0000000200000ull
+#define ASCON_MAC_IV 0x88a0000800200000ull
+#define ASCON_PRFS_IV 0x8028000000a00000ull
+#define ASCON_HASH_IV0 0xf9afb5c6a540dbc7ull
+#define ASCON_HASH_IV1 0xbd2493011445a340ull
+#define ASCON_HASH_IV2 0xcb9ba8b5604d4fc8ull
+#define ASCON_HASH_IV3 0x12a4eede94514c98ull
+#define ASCON_HASH_IV4 0x4bca84c06339f398ull
+#define ASCON_HASHA_IV0 0x0108e46d1b16eb02ull
+#define ASCON_HASHA_IV1 0x5b9b8efdd29083f3ull
+#define ASCON_HASHA_IV2 0x7ad665622891ae4aull
+#define ASCON_HASHA_IV3 0x9dc27156ee3bfc7full
+#define ASCON_HASHA_IV4 0xc61d5fa916801633ull
+#define ASCON_XOF_IV0 0xc75782817e351ae6ull
+#define ASCON_XOF_IV1 0x70045f441d238220ull
+#define ASCON_XOF_IV2 0x5dd5ab52a13e3f04ull
+#define ASCON_XOF_IV3 0x3e378142c30c1db2ull
+#define ASCON_XOF_IV4 0x3735189db624d656ull
+#define ASCON_XOFA_IV0 0x0846d7a5a4b87d44ull
+#define ASCON_XOFA_IV1 0xaa6f1005b3a2dbf4ull
+#define ASCON_XOFA_IV2 0xdc451146f713e811ull
+#define ASCON_XOFA_IV3 0x468cb2532839e30dull
+#define ASCON_XOFA_IV4 0xeb2d429709e96977ull
+#define RC0 0x0000000c0000000cull
+#define RC1 0x0000000c00000009ull
+#define RC2 0x000000090000000cull
+#define RC3 0x0000000900000009ull
+#define RC4 0x0000000c00000006ull
+#define RC5 0x0000000c00000003ull
+#define RC6 0x0000000900000006ull
+#define RC7 0x0000000900000003ull
+#define RC8 0x000000060000000cull
+#define RC9 0x0000000600000009ull
+#define RCa 0x000000030000000cull
+#define RCb 0x0000000300000009ull
+#define RC(i) constants[2 * i + 1], constants[2 * i + 0]
+#define START(n) (12 - (n))
+#define INC 1
+#define END 12
+extern const uint8_t constants[];
+#endif /* CONSTANTS_H_ */
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/crypto_aead.c
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/crypto_aead.c
+#include "crypto_aead.h"
+#include <stdlib.h>
+#include "api.h"
+#include "crypto_aead_shared.h"
+#ifdef SS_VER
+#include "hal.h"
+#else
+#define trigger_high()
+#define trigger_low()
+#endif
+int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
+                        const unsigned char* m, unsigned long long mlen,
+                        const unsigned char* a, unsigned long long alen,
+                        const unsigned char* nsec, const unsigned char* npub,
+                        const unsigned char* k) {
+  (void)nsec;
+  /* dynamic allocation of input/output shares */
+  mask_key_uint32_t* ks = malloc(sizeof(*ks) * NUM_WORDS(CRYPTO_KEYBYTES));
+  mask_npub_uint32_t* ns = malloc(sizeof(*ns) * NUM_WORDS(CRYPTO_NPUBBYTES));
+  mask_ad_uint32_t* as = malloc(sizeof(*as) * NUM_WORDS(alen));
+  mask_m_uint32_t* ms = malloc(sizeof(*ms) * NUM_WORDS(mlen));
+  mask_c_uint32_t* cs = malloc(sizeof(*cs) * NUM_WORDS(mlen + CRYPTO_ABYTES));
+  /* mask plain input data */
+  generate_shares_encrypt(m, ms, mlen, a, as, alen, npub, ns, k, ks);
+  /* call shared interface of ascon encrypt */
+  //trigger_high();
+  crypto_aead_encrypt_shared(cs, clen, ms, mlen, as, alen, ns, ks);
+  //trigger_low();
+  /* unmask shared output data */
+  combine_shares_encrypt(cs, c, *clen);
+  /* free shares */
+  free(ks);
+  free(ns);
+  free(as);
+  free(ms);
+  free(cs);
+  return 0;
+}
+int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
+                        unsigned char* nsec, const unsigned char* c,
+                        unsigned long long clen, const unsigned char* a,
+                        unsigned long long alen, const unsigned char* npub,
+                        const unsigned char* k) {
+  int result = 0;
+  (void)nsec;
+  if (clen < CRYPTO_ABYTES) return -1;
+  /* dynamic allocation of input/output shares */
+  mask_key_uint32_t* ks = malloc(sizeof(*ks) * NUM_WORDS(CRYPTO_KEYBYTES));
+  mask_npub_uint32_t* ns = malloc(sizeof(*ns) * NUM_WORDS(CRYPTO_NPUBBYTES));
+  mask_ad_uint32_t* as = malloc(sizeof(*as) * NUM_WORDS(alen));
+  mask_m_uint32_t* ms = malloc(sizeof(*ms) * NUM_WORDS(clen - CRYPTO_ABYTES));
+  mask_c_uint32_t* cs = malloc(sizeof(*cs) * NUM_WORDS(clen));
+  /* mask plain input data */
+  generate_shares_decrypt(c, cs, clen, a, as, alen, npub, ns, k, ks);
+  /* call shared interface of ascon decrypt */
+  //trigger_high();
+  result = crypto_aead_decrypt_shared(ms, mlen, cs, clen, as, alen, ns, ks);
+  //trigger_low();
+  /* unmask shared output data */
+  combine_shares_decrypt(ms, m, *mlen);
+  /* free shares */
+  free(ks);
+  free(ns);
+  free(as);
+  free(ms);
+  free(cs);
+  return result;
+}
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/crypto_aead.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/crypto_aead.h
+int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
+                        const unsigned char *m, unsigned long long mlen,
+                        const unsigned char *ad, unsigned long long adlen,
+                        const unsigned char *nsec, const unsigned char *npub,
+                        const unsigned char *k);
+int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
+                        unsigned char *nsec, const unsigned char *c,
+                        unsigned long long clen, const unsigned char *ad,
+                        unsigned long long adlen, const unsigned char *npub,
+                        const unsigned char *k);
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/crypto_aead_shared.c
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/crypto_aead_shared.c
+#include "crypto_aead_shared.h"
+#include <stdlib.h>
+#include "api.h"
+#include "ascon.h"
+#include "shares.h"
+int crypto_aead_encrypt_shared(mask_c_uint32_t* cs, unsigned long long* clen,
+                               const mask_m_uint32_t* ms,
+                               unsigned long long mlen,
+                               const mask_ad_uint32_t* ads,
+                               unsigned long long adlen,
+                               const mask_npub_uint32_t* npubs,
+                               const mask_key_uint32_t* ks) {
+  state_t s;
+  *clen = mlen + CRYPTO_ABYTES;
+  ascon_initaead(&s, npubs, ks);
+  ascon_adata(&s, ads, adlen);
+  ascon_encrypt(&s, cs, ms, mlen);
+  ascon_final(&s, ks);
+  ascon_settag(&s, cs + NUM_WORDS(mlen));
+  return 0;
+}
+int crypto_aead_decrypt_shared(mask_m_uint32_t* ms, unsigned long long* mlen,
+                               const mask_c_uint32_t* cs,
+                               unsigned long long clen,
+                               const mask_ad_uint32_t* ads,
+                               unsigned long long adlen,
+                               const mask_npub_uint32_t* npubs,
+                               const mask_key_uint32_t* ks) {
+  state_t s;
+  *mlen = clen - CRYPTO_ABYTES;
+  ascon_initaead(&s, npubs, ks);
+  ascon_adata(&s, ads, adlen);
+  ascon_decrypt(&s, ms, cs, *mlen);
+  ascon_final(&s, ks);
+  return ascon_verify(&s, cs + NUM_WORDS(*mlen));
+}
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/crypto_aead_shared.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/crypto_aead_shared.h
+#include "shares.h"
+int crypto_aead_encrypt_shared(mask_c_uint32_t* cs, unsigned long long* clen,
+                               const mask_m_uint32_t* ms,
+                               unsigned long long mlen,
+                               const mask_ad_uint32_t* ads,
+                               unsigned long long adlen,
+                               const mask_npub_uint32_t* npubs,
+                               const mask_key_uint32_t* ks);
+int crypto_aead_decrypt_shared(mask_m_uint32_t* ms, unsigned long long* mlen,
+                               const mask_c_uint32_t* cs,
+                               unsigned long long clen,
+                               const mask_ad_uint32_t* ads,
+                               unsigned long long adlen,
+                               const mask_npub_uint32_t* npubs,
+                               const mask_key_uint32_t* ks);
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/endian.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/endian.h
+#ifndef ENDIAN_H_
+#define ENDIAN_H_
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+/* macros for big endian machines */
+#ifdef PRAGMA_ENDIAN
+#pragma message("Using macros for big endian machines")
+#endif
+#define U64BIG(x) (x)
+#define U32BIG(x) (x)
+#define U16BIG(x) (x)
+#elif defined(_MSC_VER) || \
+    (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+/* macros for little endian machines */
+#ifdef PRAGMA_ENDIAN
+#pragma message("Using macros for little endian machines")
+#endif
+#define U64BIG(x)                          \
+  (((0x00000000000000FFULL & (x)) << 56) | \
+   ((0x000000000000FF00ULL & (x)) << 40) | \
+   ((0x0000000000FF0000ULL & (x)) << 24) | \
+   ((0x00000000FF000000ULL & (x)) << 8) |  \
+   ((0x000000FF00000000ULL & (x)) >> 8) |  \
+   ((0x0000FF0000000000ULL & (x)) >> 24) | \
+   ((0x00FF000000000000ULL & (x)) >> 40) | \
+   ((0xFF00000000000000ULL & (x)) >> 56))
+#define U32BIG(x)                                           \
+  (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
+   ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
+#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
+#else
+#error "Ascon byte order macros not defined in endian.h"
+#endif
+#endif /* ENDIAN_H_ */
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/forceinline.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/forceinline.h
+#ifndef FORCEINLINE_H_
+#define FORCEINLINE_H_
+/* define forceinline macro */
+#ifdef _MSC_VER
+#define forceinline __forceinline
+#elif defined(__GNUC__)
+#define forceinline inline __attribute__((__always_inline__))
+#elif defined(__CLANG__)
+#if __has_attribute(__always_inline__)
+#define forceinline inline __attribute__((__always_inline__))
+#else
+#define forceinline inline
+#endif
+#else
+#define forceinline inline
+#endif
+#endif /* FORCEINLINE_H_ */
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/goal-constbranch
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/goal-constbranch
+Branches reviewed 2020-11-13 by Martin Schläffer.
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/goal-constindex
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/goal-constindex
+Addresses reviewed 2020-11-13 by Martin Schläffer.
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/goal_emsca
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/goal_emsca
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/goal_powersca_1st
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/goal_powersca_1st
+using 3 rotated shares
+using 2 rotated shares (on certain devices)
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/goal_powersca_2nd
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/goal_powersca_2nd
+using 3 rotated shares (on certain devices)
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/implementors
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/implementors
+Christoph Dobraunig
+Martin Schläffer
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/interleave.c
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/interleave.c
+#include "interleave.h"
+const uint32_t B[3] = {0x22222222, 0x0c0c0c0c, 0x00f000f0};
+uint64_t TOBI(uint64_t in) { return deinterleave32(in); }
+uint64_t FROMBI(uint64_t in) { return interleave32(in); }
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/interleave.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/interleave.h
+#ifndef INTERLEAVE_H_
+#define INTERLEAVE_H_
+#include <stdint.h>
+#include "config.h"
+#include "forceinline.h"
+#define BD(e, o, lo, hi)                              \
+  do {                                                \
+    uint64_t tmp = TOBI((uint64_t)(hi) << 32 | (lo)); \
+    e = (uint32_t)tmp;                                \
+    o = tmp >> 32;                                    \
+  } while (0)
+#define BI(lo, hi, e, o)                              \
+  do {                                                \
+    uint64_t tmp = FROMBI((uint64_t)(o) << 32 | (e)); \
+    lo = (uint32_t)tmp;                               \
+    hi = tmp >> 32;                                   \
+  } while (0)
+uint64_t TOBI(uint64_t in);
+uint64_t FROMBI(uint64_t in);
+extern const uint32_t B[3];
+forceinline uint32_t deinterleave16(uint32_t x) {
+  uint32_t t;
+  t = (x ^ (x >> 1)) & B[0], x ^= t ^ (t << 1);
+  t = (x ^ (x >> 2)) & B[1], x ^= t ^ (t << 2);
+  t = (x ^ (x >> 4)) & B[2], x ^= t ^ (t << 4);
+  t = (x ^ (x >> 8)) & 0xff00, x ^= t ^ (t << 8);
+  return x;
+}
+forceinline uint32_t interleave16(uint32_t x) {
+  uint32_t t;
+  t = (x ^ (x >> 8)) & 0xff00, x ^= t ^ (t << 8);
+  t = (x ^ (x >> 4)) & B[2], x ^= t ^ (t << 4);
+  t = (x ^ (x >> 2)) & B[1], x ^= t ^ (t << 2);
+  t = (x ^ (x >> 1)) & B[0], x ^= t ^ (t << 1);
+  return x;
+}
+/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
+forceinline uint64_t deinterleave32(uint64_t in) {
+  uint32_t hi = in >> 32;
+  uint32_t lo = in;
+  uint32_t t0, t1, e, o;
+  t0 = deinterleave16(lo);
+  t1 = deinterleave16(hi);
+  e = (t1 << 16) | (t0 & 0x0000FFFF);
+  o = (t1 & 0xFFFF0000) | (t0 >> 16);
+  return (uint64_t)o << 32 | e;
+}
+/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
+forceinline uint64_t interleave32(uint64_t in) {
+  uint32_t e = in;
+  uint32_t o = in >> 32;
+  uint32_t t0, t1, lo, hi;
+  t0 = (o << 16) | (e & 0x0000FFFF);
+  t1 = (o & 0xFFFF0000) | (e >> 16);
+  lo = interleave16(t0);
+  hi = interleave16(t1);
+  return (uint64_t)hi << 32 | lo;
+}
+#endif /* INTERLEAVE_H_ */
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/permutations.c
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/permutations.c
+#include "permutations.h"
+#include "api.h"
+#include "round.h"
+#if NUM_SHARES_KEY == 1 || NUM_SHARES_AD == 1 || NUM_SHARES_M == 1
+void P1(state_t* s, int nr) { PROUNDS(s, nr, 1); }
+#endif
+#if NUM_SHARES_KEY == 2 || NUM_SHARES_AD == 2 || NUM_SHARES_M == 2
+void P2(state_t* s, int nr) { PROUNDS(s, nr, 2); }
+#endif
+#if NUM_SHARES_KEY == 3 || NUM_SHARES_AD == 3 || NUM_SHARES_M == 3
+void P3(state_t* s, int nr) { PROUNDS(s, nr, 3); }
+#endif
+#if NUM_SHARES_KEY == 4 || NUM_SHARES_AD == 4 || NUM_SHARES_M == 4
+void P4(state_t* s, int nr) { PROUNDS(s, nr, 4); }
+#endif
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/permutations.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/permutations.h
+#ifndef PERMUTATIONS_H_
+#define PERMUTATIONS_H_
+#include "ascon.h"
+void P1(state_t* s, int nr);
+void P2(state_t* s, int nr);
+void P3(state_t* s, int nr);
+void P4(state_t* s, int nr);
+forceinline void P(state_t* s, int nr, int ns) {
+  if (ns == 1) P1(s, nr);
+  if (ns == 2) P2(s, nr);
+  if (ns == 3) P3(s, nr);
+  if (ns == 4) P4(s, nr);
+}
+#endif /* PERMUTATIONS_H_ */
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/printstate.c
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/printstate.c
+#ifdef ASCON_PRINT_STATE
+#include "printstate.h"
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include "ascon.h"
+#include "shares.h"
+#include "word.h"
+void printword(const char* text, const word_t x, int ns) {
+  uint32_t lo, hi, e = 0, o = 0;
+  for (int d = 0; d < ns; ++d) {
+    e ^= ROR32(x.s[d].w[0], ROT(d));
+    o ^= ROR32(x.s[d].w[1], ROT(d));
+  }
+  BI(lo, hi, e, o);
+  printf("%s=%016" PRIx64, text, (uint64_t)hi << 32 | lo);
+#ifdef ASCON_PRINTBI32
+  printf(" (%08x_%08x)", o, e);
+#endif
+}
+void printstate(const char* text, const state_t* s, int ns) {
+  printf("%s:", text);
+  for (int i = strlen(text); i < 17; ++i) printf(" ");
+  printword(" x0", s->x[0], ns);
+  printword(" x1", s->x[1], ns);
+  printword(" x2", s->x[2], ns);
+  printword(" x3", s->x[3], ns);
+  printword(" x4", s->x[4], ns);
+  printf("\n");
+}
+#endif
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/printstate.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/printstate.h
+#ifndef PRINTSTATE_H_
+#define PRINTSTATE_H_
+#ifdef ASCON_PRINT_STATE
+#include "ascon.h"
+#include "word.h"
+void printword(const char* text, const word_t x, int ns);
+void printstate(const char* text, const state_t* si, int ns);
+#else
+#define printword(text, w, ns) \
+  do {                         \
+  } while (0)
+#define printstate(text, s, ns) \
+  do {                          \
+  } while (0)
+#endif
+#endif /* PRINTSTATE_H_ */
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/round.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/round.h
+#ifndef ROUND_H_
+#define ROUND_H_
+#include "ascon.h"
+#include "constants.h"
+#include "printstate.h"
+forceinline state_t AFFINE1(state_t s, int i, int d) {
+  s.x[2].s[d].w[i] ^= s.x[1].s[d].w[i];
+  s.x[0].s[d].w[i] ^= s.x[4].s[d].w[i];
+  s.x[4].s[d].w[i] ^= s.x[3].s[d].w[i];
+  return s;
+}
+forceinline state_t AFFINE2(state_t s, int i, int d) {
+  s.x[2].s[d].w[i] ^= s.x[5].s[d].w[i];
+  s.x[1].s[d].w[i] ^= s.x[0].s[d].w[i];
+  s.x[0].s[d].w[i] ^= s.x[4].s[d].w[i];
+  s.x[3].s[d].w[i] ^= s.x[2].s[d].w[i];
+  return s;
+}
+forceinline state_t SBOX(state_t s, int i, int ns) {
+  /* affine layer 1 */
+  if (ns >= 1) s = AFFINE1(s, i, 0);
+  if (ns >= 2) s = AFFINE1(s, i, 1);
+  if (ns >= 3) s = AFFINE1(s, i, 2);
+  if (ns >= 4) s = AFFINE1(s, i, 3);
+  /* Toffoli gates */
+  s.x[5] = MXORBIC(s.x[5], s.x[4], s.x[3], i, ns);
+  s.x[4] = MXORBIC(s.x[4], s.x[1], s.x[0], i, ns);
+  s.x[1] = MXORBIC(s.x[1], s.x[3], s.x[2], i, ns);
+  s.x[3] = MXORBIC(s.x[3], s.x[0], s.x[4], i, ns);
+  s.x[0] = MXORBIC(s.x[0], s.x[2], s.x[1], i, ns);
+  /* affine layer 2 */
+  if (ns >= 1) s = AFFINE2(s, i, 0);
+  s.x[2].s[0].w[i] = ~s.x[2].s[0].w[i];
+  if (ns >= 2) s = AFFINE2(s, i, 1);
+  if (ns >= 3) s = AFFINE2(s, i, 2);
+  if (ns >= 4) s = AFFINE2(s, i, 3);
+  return s;
+}
+forceinline state_t LINEAR(state_t s, int d) {
+  state_t t;
+  t.x[0].s[d].w[0] = s.x[0].s[d].w[0] ^ ROR32(s.x[0].s[d].w[1], 4);
+  t.x[0].s[d].w[1] = s.x[0].s[d].w[1] ^ ROR32(s.x[0].s[d].w[0], 5);
+  t.x[1].s[d].w[0] = s.x[1].s[d].w[0] ^ ROR32(s.x[1].s[d].w[0], 11);
+  t.x[1].s[d].w[1] = s.x[1].s[d].w[1] ^ ROR32(s.x[1].s[d].w[1], 11);
+  t.x[2].s[d].w[0] = s.x[2].s[d].w[0] ^ ROR32(s.x[2].s[d].w[1], 2);
+  t.x[2].s[d].w[1] = s.x[2].s[d].w[1] ^ ROR32(s.x[2].s[d].w[0], 3);
+  t.x[3].s[d].w[0] = s.x[3].s[d].w[0] ^ ROR32(s.x[3].s[d].w[1], 3);
+  t.x[3].s[d].w[1] = s.x[3].s[d].w[1] ^ ROR32(s.x[3].s[d].w[0], 4);
+  t.x[4].s[d].w[0] = s.x[4].s[d].w[0] ^ ROR32(s.x[4].s[d].w[0], 17);
+  t.x[4].s[d].w[1] = s.x[4].s[d].w[1] ^ ROR32(s.x[4].s[d].w[1], 17);
+  s.x[0].s[d].w[0] ^= ROR32(t.x[0].s[d].w[1], 9);
+  s.x[0].s[d].w[1] ^= ROR32(t.x[0].s[d].w[0], 10);
+  s.x[1].s[d].w[0] ^= ROR32(t.x[1].s[d].w[1], 19);
+  s.x[1].s[d].w[1] ^= ROR32(t.x[1].s[d].w[0], 20);
+  s.x[2].s[d].w[0] ^= ROR32(t.x[2].s[d].w[1], 0);
+  s.x[2].s[d].w[1] ^= ROR32(t.x[2].s[d].w[0], 1);
+  s.x[3].s[d].w[0] ^= ROR32(t.x[3].s[d].w[0], 5);
+  s.x[3].s[d].w[1] ^= ROR32(t.x[3].s[d].w[1], 5);
+  s.x[4].s[d].w[0] ^= ROR32(t.x[4].s[d].w[1], 3);
+  s.x[4].s[d].w[1] ^= ROR32(t.x[4].s[d].w[0], 4);
+  return s;
+}
+forceinline void ROUND_(state_t* p, uint8_t C_o, uint8_t C_e, int ns) {
+  state_t s = *p;
+  /* constant and sbox layer*/
+  s.x[2].s[0].w[0] ^= C_e;
+  s = SBOX(s, 0, ns);
+  s.x[2].s[0].w[1] ^= C_o;
+  s = SBOX(s, 1, ns);
+  /* reuse rotated randomness */
+  s.x[5] = MREUSE(s.x[5], 0, ns);
+  /* linear layer*/
+  if (ns >= 4) s = LINEAR(s, 3);
+  if (ns >= 3) s = LINEAR(s, 2);
+  if (ns >= 2) s = LINEAR(s, 1);
+  if (ns >= 1) s = LINEAR(s, 0);
+  *p = s;
+  printstate(" round output", &s, ns);
+}
+forceinline void ROUND(state_t* p, uint64_t C, int ns) {
+  ROUND_(p, C >> 32, C, ns);
+}
+forceinline void PROUNDS(state_t* s, int nr, int ns) {
+  int i = START(nr);
+  do {
+    ROUND_(s, RC(i), ns);
+    i += INC;
+  } while (i != END);
+}
+#endif /* ROUND_H_ */
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/shares.c
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/shares.c
+#include "shares.h"
+#include <stdlib.h>
+#include <string.h>
+#include "endian.h"
+#include "forceinline.h"
+#include "interleave.h"
+forceinline uint32_t ROR32(uint32_t x, int n) {
+  return x >> n | x << (-n & 31);
+}
+forceinline uint64_t ROR64(uint64_t x, int n) {
+  return x >> n | x << (-n & 63);
+}
+void generate_shares(uint32_t* s, int num_shares, const uint8_t* data,
+                     uint64_t len);
+void combine_shares(uint8_t* data, uint64_t len, const uint32_t* s,
+                    int num_shares);
+void generate_shares_encrypt(const unsigned char* m, mask_m_uint32_t* ms,
+                             const unsigned long long mlen,
+                             const unsigned char* ad, mask_ad_uint32_t* ads,
+                             const unsigned long long adlen,
+                             const unsigned char* npub,
+                             mask_npub_uint32_t* npubs, const unsigned char* k,
+                             mask_key_uint32_t* ks) {
+  generate_shares((uint32_t*)ks, NUM_SHARES_KEY, k, CRYPTO_KEYBYTES);
+  generate_shares((uint32_t*)npubs, NUM_SHARES_NPUB, npub, CRYPTO_NPUBBYTES);
+  generate_shares((uint32_t*)ads, NUM_SHARES_AD, ad, adlen);
+  generate_shares((uint32_t*)ms, NUM_SHARES_M, m, mlen);
+}
+void generate_shares_decrypt(const unsigned char* c, mask_c_uint32_t* cs,
+                             const unsigned long long clen,
+                             const unsigned char* ad, mask_ad_uint32_t* ads,
+                             const unsigned long long adlen,
+                             const unsigned char* npub,
+                             mask_npub_uint32_t* npubs, const unsigned char* k,
+                             mask_key_uint32_t* ks) {
+  unsigned long long mlen = clen - CRYPTO_ABYTES;
+  mask_c_uint32_t* ts = cs + NUM_WORDS(mlen);
+  generate_shares((uint32_t*)ks, NUM_SHARES_KEY, k, CRYPTO_KEYBYTES);
+  generate_shares((uint32_t*)npubs, NUM_SHARES_NPUB, npub, CRYPTO_NPUBBYTES);
+  generate_shares((uint32_t*)ads, NUM_SHARES_AD, ad, adlen);
+  generate_shares((uint32_t*)cs, NUM_SHARES_C, c, mlen);
+  generate_shares((uint32_t*)ts, NUM_SHARES_C, c + mlen, CRYPTO_ABYTES);
+}
+void combine_shares_encrypt(const mask_c_uint32_t* cs, unsigned char* c,
+                            unsigned long long clen) {
+  unsigned long long mlen = clen - CRYPTO_ABYTES;
+  const mask_c_uint32_t* ts = cs + NUM_WORDS(mlen);
+  combine_shares(c, mlen, (uint32_t*)cs, NUM_SHARES_C);
+  combine_shares(c + mlen, CRYPTO_ABYTES, (uint32_t*)ts, NUM_SHARES_C);
+}
+void combine_shares_decrypt(const mask_m_uint32_t* ms, unsigned char* m,
+                            unsigned long long mlen) {
+  combine_shares(m, mlen, (uint32_t*)ms, NUM_SHARES_M);
+}
+void generate_shares(uint32_t* s, int num_shares, const uint8_t* data,
+                     uint64_t len) {
+  uint32_t rnd0, rnd1;
+  uint64_t rnd, i;
+  /* generate random shares */
+  for (i = 0; i < NUM_WORDS(len); i += 2) {
+    s[(i + 0) * num_shares + 0] = 0;
+    s[(i + 1) * num_shares + 0] = 0;
+    for (int d = 1; d < num_shares; ++d) {
+      RND(rnd0);
+      RND(rnd1);
+      s[(i + 0) * num_shares + d] = rnd0;
+      s[(i + 1) * num_shares + d] = rnd1;
+#if ASCON_EXTERN_BI
+      s[(i + 0) * num_shares + 0] ^= ROR32(rnd0, ROT(d));
+      s[(i + 1) * num_shares + 0] ^= ROR32(rnd1, ROT(d));
+#else
+      rnd = ROR64((uint64_t)rnd1 << 32 | rnd0, ROT(2 * d));
+      s[(i + 0) * num_shares + 0] ^= (uint32_t)rnd;
+      s[(i + 1) * num_shares + 0] ^= (uint32_t)(rnd >> 32);
+#endif
+    }
+  }
+  /* mask complete words */
+  for (i = 0; i < len / 8; ++i) {
+    uint64_t x;
+    memcpy(&x, data + i * 8, 8);
+    x = U64BIG(x);
+#if ASCON_EXTERN_BI
+    x = TOBI(x);
+#endif
+    s[(2 * i + 0) * num_shares + 0] ^= (uint32_t)x;
+    s[(2 * i + 1) * num_shares + 0] ^= (uint32_t)(x >> 32);
+  }
+  /* mask remaining bytes */
+  if ((len / 8 * 8) != len) {
+    uint64_t x = 0;
+    for (i = (len / 8) * 8; i < len; ++i) {
+      x ^= (uint64_t)data[i] << ((i % 8) * 8);
+    }
+    x = U64BIG(x);
+#if ASCON_EXTERN_BI
+    x = TOBI(x);
+#endif
+    s[(2 * (len / 8) + 0) * num_shares + 0] ^= (uint32_t)x;
+    s[(2 * (len / 8) + 1) * num_shares + 0] ^= (uint32_t)(x >> 32);
+  }
+}
+void combine_shares(uint8_t* data, uint64_t len, const uint32_t* s,
+                    int num_shares) {
+  uint32_t rnd0, rnd1;
+  uint64_t i;
+  /* unmask complete words */
+  for (i = 0; i < len / 8; ++i) {
+    uint64_t x = 0;
+    for (int d = 0; d < num_shares; ++d) {
+      rnd0 = s[(2 * i + 0) * num_shares + d];
+      rnd1 = s[(2 * i + 1) * num_shares + d];
+#if ASCON_EXTERN_BI
+      x ^= (uint64_t)ROR32(rnd0, ROT(d));
+      x ^= (uint64_t)ROR32(rnd1, ROT(d)) << 32;
+#else
+      x ^= ROR64((uint64_t)rnd1 << 32 | rnd0, ROT(2 * d));
+#endif
+    }
+#if ASCON_EXTERN_BI
+    x = FROMBI(x);
+#endif
+    x = U64BIG(x);
+    memcpy(data + i * 8, &x, 8);
+  }
+  /* unmask remaining bytes */
+  if ((len / 8 * 8) != len) {
+    uint64_t x = 0;
+    for (int d = 0; d < num_shares; ++d) {
+      rnd0 = s[(2 * (len / 8) + 0) * num_shares + d];
+      rnd1 = s[(2 * (len / 8) + 1) * num_shares + d];
+#if ASCON_EXTERN_BI
+      x ^= (uint64_t)ROR32(rnd0, ROT(d));
+      x ^= (uint64_t)ROR32(rnd1, ROT(d)) << 32;
+#else
+      x ^= ROR64((uint64_t)rnd1 << 32 | rnd0, ROT(2 * d));
+#endif
+    }
+#if ASCON_EXTERN_BI
+    x = FROMBI(x);
+#endif
+    x = U64BIG(x);
+    for (i = (len / 8) * 8; i < len; ++i) {
+      data[i] = x >> ((i % 8) * 8);
+    }
+  }
+}
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/shares.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/shares.h
+#ifndef SHARES_H_
+#define SHARES_H_
+#include <stdint.h>
+#include "api.h"
+#include "config.h"
+#include "randombytes.h"
+#define NUM_WORDS(len) ((((len) + 7) / 8) * 2)
+#define ROT(i) (((i) * (ASCON_ROR_SHARES)) & 31)
+#define RND(rnd) randombytes((unsigned char*)&rnd, 4)
+typedef struct {
+  uint32_t shares[NUM_SHARES_M];
+} mask_m_uint32_t;
+typedef struct {
+  uint32_t shares[NUM_SHARES_C];
+} mask_c_uint32_t;
+typedef struct {
+  uint32_t shares[NUM_SHARES_AD];
+} mask_ad_uint32_t;
+typedef struct {
+  uint32_t shares[NUM_SHARES_NPUB];
+} mask_npub_uint32_t;
+typedef struct {
+  uint32_t shares[NUM_SHARES_KEY];
+} mask_key_uint32_t;
+void generate_shares_encrypt(const unsigned char* m, mask_m_uint32_t* ms,
+                             const unsigned long long mlen,
+                             const unsigned char* ad, mask_ad_uint32_t* ads,
+                             const unsigned long long adlen,
+                             const unsigned char* npub,
+                             mask_npub_uint32_t* npubs, const unsigned char* k,
+                             mask_key_uint32_t* ks);
+void generate_shares_decrypt(const unsigned char* c, mask_c_uint32_t* cs,
+                             const unsigned long long clen,
+                             const unsigned char* ad, mask_ad_uint32_t* ads,
+                             const unsigned long long adlen,
+                             const unsigned char* npub,
+                             mask_npub_uint32_t* npubs, const unsigned char* k,
+                             mask_key_uint32_t* ks);
+void combine_shares_encrypt(const mask_c_uint32_t* cs, unsigned char* c,
+                            unsigned long long clen);
+void combine_shares_decrypt(const mask_m_uint32_t* ms, unsigned char* m,
+                            unsigned long long mlen);
+#endif /* SHARES_H_ */
--- a/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/word.h
+++ b/src-ascon/ascon/Implementations/crypto_aead/ascon128v12/protected_bi32_armv6/word.h
+#ifndef WORD_H_
+#define WORD_H_
+#include <stdint.h>
+#include <string.h>
+#include "asm.h"
+#include "config.h"
+#include "endian.h"
+#include "forceinline.h"
+#include "interleave.h"
+#include "shares.h"
+typedef struct {
+  uint32_t w[2];
+} share_t;
+typedef struct {
+  share_t s[NUM_SHARES_KEY];
+} word_t;
+forceinline uint32_t ROR32(uint32_t x, int n) {
+  return x >> n | x << (-n & 31);
+}
+forceinline uint64_t ROR32x2(uint64_t x, int n) {
+  uint32_t lo = x;
+  uint32_t hi = x >> 32;
+  lo = ROR32(lo, n);
+  hi = ROR32(hi, n);
+  return (uint64_t)hi << 32 | lo;
+}
+forceinline uint64_t ROR64(uint64_t x, int n) {
+  return x >> n | x << (-n & 63);
+}
+forceinline word_t MXOR(word_t a, word_t b, int ns) {
+  if (ns >= 1) a.s[0].w[0] ^= b.s[0].w[0];
+  if (ns >= 1) a.s[0].w[1] ^= b.s[0].w[1];
+  if (ns >= 2) a.s[1].w[0] ^= b.s[1].w[0];
+  if (ns >= 2) a.s[1].w[1] ^= b.s[1].w[1];
+  if (ns >= 3) a.s[2].w[0] ^= b.s[2].w[0];
+  if (ns >= 3) a.s[2].w[1] ^= b.s[2].w[1];
+  if (ns >= 4) a.s[3].w[0] ^= b.s[3].w[0];
+  if (ns >= 4) a.s[3].w[1] ^= b.s[3].w[1];
+  return a;
+}
+forceinline word_t MXORBIC(word_t c, word_t a, word_t b, int i, int ns) {
+  uint32_t tmp;
+  if (ns == 1) {
+    EOR_BIC_ROR(c.s[0].w[i], a.s[0].w[i], b.s[0].w[i], 0, tmp);
+  }
+  if (ns == 2) {
+    EOR_BIC_ROR(c.s[0].w[i], a.s[0].w[i], b.s[0].w[i], 0, tmp);
+    EOR_BIC_ROR(c.s[1].w[i], a.s[1].w[i], b.s[0].w[i], 0 - 1, tmp);
+    CLEAR();
+    EOR_AND_ROR(c.s[1].w[i], a.s[1].w[i], b.s[1].w[i], 0, tmp);
+    EOR_AND_ROR(c.s[0].w[i], a.s[0].w[i], b.s[1].w[i], 1 - 0, tmp);
+    CLEAR();
+  }
+  if (ns == 3) {
+    EOR_AND_ROR(c.s[0].w[i], b.s[0].w[i], a.s[1].w[i], 1 - 0, tmp);
+    EOR_BIC_ROR(c.s[0].w[i], a.s[0].w[i], b.s[0].w[i], 0, tmp);
+    EOR_AND_ROR(c.s[0].w[i], b.s[0].w[i], a.s[2].w[i], 2 - 0, tmp);
+    EOR_AND_ROR(c.s[1].w[i], b.s[1].w[i], a.s[2].w[i], 2 - 1, tmp);
+    EOR_BIC_ROR(c.s[1].w[i], a.s[1].w[i], b.s[1].w[i], 0, tmp);
+    EOR_AND_ROR(c.s[1].w[i], b.s[1].w[i], a.s[0].w[i], 0 - 1, tmp);
+    EOR_BIC_ROR(c.s[2].w[i], b.s[2].w[i], a.s[0].w[i], 0 - 2, tmp);
+    EOR_ORR_ROR(c.s[2].w[i], a.s[2].w[i], b.s[2].w[i], 0, tmp);
+    EOR_AND_ROR(c.s[2].w[i], b.s[2].w[i], a.s[1].w[i], 1 - 2, tmp);
+  }
+  if (ns == 4) {
+    EOR_BIC_ROR(c.s[0].w[i], a.s[0].w[i], b.s[0].w[i], 0, tmp);
+    EOR_BIC_ROR(c.s[1].w[i], a.s[1].w[i], b.s[0].w[i], 0 - 1, tmp);
+    EOR_BIC_ROR(c.s[2].w[i], a.s[2].w[i], b.s[0].w[i], 0 - 2, tmp);
+    EOR_BIC_ROR(c.s[3].w[i], a.s[3].w[i], b.s[0].w[i], 0 - 3, tmp);
+    EOR_AND_ROR(c.s[1].w[i], a.s[1].w[i], b.s[1].w[i], 0, tmp);
+    EOR_AND_ROR(c.s[2].w[i], a.s[2].w[i], b.s[1].w[i], 1 - 2, tmp);
+    EOR_AND_ROR(c.s[3].w[i], a.s[3].w[i], b.s[1].w[i], 1 - 3, tmp);
+    EOR_AND_ROR(c.s[0].w[i], a.s[0].w[i], b.s[1].w[i], 1 - 0, tmp);
+    EOR_AND_ROR(c.s[2].w[i], a.s[2].w[i], b.s[2].w[i], 0, tmp);
+    EOR_AND_ROR(c.s[3].w[i], a.s[3].w[i], b.s[2].w[i], 2 - 3, tmp);
+    EOR_AND_ROR(c.s[0].w[i], a.s[0].w[i], b.s[2].w[i], 2 - 0, tmp);
+    EOR_AND_ROR(c.s[1].w[i], a.s[1].w[i], b.s[2].w[i], 2 - 1, tmp);
+    EOR_AND_ROR(c.s[3].w[i], a.s[3].w[i], b.s[3].w[i], 0, tmp);
+    EOR_AND_ROR(c.s[0].w[i], a.s[0].w[i], b.s[3].w[i], 3 - 0, tmp);
+    EOR_AND_ROR(c.s[1].w[i], a.s[1].w[i], b.s[3].w[i], 3 - 1, tmp);
+    EOR_AND_ROR(c.s[2].w[i], a.s[2].w[i], b.s[3].w[i], 3 - 2, tmp);
+  }
+  return c;
+}
+forceinline word_t MXORAND(word_t c, word_t a, word_t b, int ns) {
+  b.s[0].w[0] = ~b.s[0].w[0];
+  b.s[0].w[1] = ~b.s[0].w[1];
+  c = MXORBIC(c, a, b, 0, ns);
+  c = MXORBIC(c, a, b, 1, ns);
+  return c;
+}
+forceinline word_t MRND(int ns) {
+  word_t w;
+  if (ns >= 2) RND(w.s[1].w[0]);
+  if (ns >= 2) RND(w.s[1].w[1]);
+  if (ns >= 3) RND(w.s[2].w[0]);
+  if (ns >= 3) RND(w.s[2].w[1]);
+  if (ns >= 4) RND(w.s[3].w[0]);
+  if (ns >= 4) RND(w.s[3].w[1]);
+  return w;
+}
+forceinline word_t MMIX(word_t w, int ns) {
+  if (ns >= 2) w.s[1].w[0] = ROR32(w.s[1].w[0], 7);
+  if (ns >= 2) w.s[1].w[1] = ROR32(w.s[1].w[1], 7);
+  if (ns >= 3) w.s[2].w[0] = ROR32(w.s[2].w[0], 13);
+  if (ns >= 3) w.s[2].w[1] = ROR32(w.s[2].w[1], 13);
+  if (ns >= 4) w.s[3].w[0] = ROR32(w.s[3].w[0], 29);
+  if (ns >= 4) w.s[3].w[1] = ROR32(w.s[3].w[1], 29);
+  return w;
+}
+forceinline word_t MREDUCE(word_t w, int nsi, int nso) {
+  if (nsi >= 2 && nso < 2) w.s[0].w[0] ^= ROR32(w.s[1].w[0], ROT(1));
+  if (nsi >= 2 && nso < 2) w.s[0].w[1] ^= ROR32(w.s[1].w[1], ROT(1));
+  if (nsi >= 3 && nso < 3) w.s[0].w[0] ^= ROR32(w.s[2].w[0], ROT(2));
+  if (nsi >= 3 && nso < 3) w.s[0].w[1] ^= ROR32(w.s[2].w[1], ROT(2));
+  if (nsi >= 4 && nso < 4) w.s[0].w[0] ^= ROR32(w.s[3].w[0], ROT(3));
+  if (nsi >= 4 && nso < 4) w.s[0].w[1] ^= ROR32(w.s[3].w[1], ROT(3));
+  return w;
+}
+forceinline word_t MEXPAND(word_t w, int nsi, int nso) {
+  return MREDUCE(w, nso, nsi);
+}
+forceinline word_t MREUSE(word_t w, uint64_t val, int ns) {
+  w.s[0].w[0] = (uint32_t)val;
+  w.s[0].w[1] = val >> 32;
+  w = MMIX(w, ns);
+  w = MEXPAND(w, 1, ns);
+  return w;
+}
+forceinline word_t MZERO(int ns) {
+  word_t w;
+  if (ns == 1) {
+    MOVI(w.s[0].w[0], 0);
+    MOVI(w.s[0].w[1], 0);
+  }
+  if (ns >= 2) {
+    RND(w.s[1].w[0]);
+    RND(w.s[1].w[1]);
+    RORI(w.s[1].w[0], w.s[1].w[0], 7);
+    RORI(w.s[1].w[1], w.s[1].w[1], 7);
+    RORI(w.s[0].w[0], w.s[1].w[0], ROT(1));
+    RORI(w.s[0].w[1], w.s[1].w[1], ROT(1));
+    if (ns == 2) CLEAR();
+  }
+  if (ns >= 3) {
+    RND(w.s[2].w[0]);
+    RND(w.s[2].w[1]);
+    RORI(w.s[2].w[0], w.s[2].w[0], 13);
+    RORI(w.s[2].w[1], w.s[2].w[1], 13);
+    EOR_ROR(w.s[0].w[0], w.s[0].w[0], w.s[2].w[0], ROT(2));
+    EOR_ROR(w.s[0].w[1], w.s[0].w[1], w.s[2].w[1], ROT(2));
+  }
+  if (ns >= 4) {
+    RND(w.s[3].w[0]);
+    RND(w.s[3].w[1]);
+    RORI(w.s[3].w[0], w.s[3].w[0], 29);
+    RORI(w.s[3].w[1], w.s[3].w[1], 29);
+    EOR_ROR(w.s[0].w[0], w.s[0].w[0], w.s[3].w[0], ROT(3));
+    EOR_ROR(w.s[0].w[1], w.s[0].w[1], w.s[3].w[1], ROT(3));
+  }
+  return w;
+}
+forceinline word_t MMASK(int n, int ns) {
+  uint32_t mask = 0xffffffff >> (n * 4);
+  word_t m = MZERO(ns);
+  m.s[0].w[0] ^= mask;
+  m.s[0].w[1] ^= mask;
+  return m;
+}
+forceinline word_t MREFRESH(word_t w, int ns) {
+  word_t r = MZERO(ns);
+  return MXOR(w, r, ns);
+}
+forceinline int MNOTZERO(word_t a, word_t b, int ns) {
+  word_t c = MZERO(ns);
+  /* note: OR(a,b) = ~BIC(~a,b) */
+  a.s[0].w[0] = ~a.s[0].w[0];
+  a.s[0].w[1] = ~a.s[0].w[1];
+  /* OR first and second 64-bit word */
+  c = MXORBIC(c, a, b, 0, ns);
+  c = MXORBIC(c, a, b, 1, ns);
+  /* OR even and odd words */
+  if (ns >= 1) b.s[0].w[0] = c.s[0].w[1];
+  if (ns >= 2) b.s[1].w[0] = c.s[1].w[1];
+  if (ns >= 3) b.s[2].w[0] = c.s[2].w[1];
+  if (ns >= 4) b.s[3].w[0] = c.s[3].w[1];
+  a = MXORBIC(a, b, c, 0, ns);
+  /* loop to OR 16/8/4/2/1 bit chunks */
+  for (int i = 16; i > 0; i >>= 1) {
+    if (ns >= 1) b.s[0].w[0] = ROR32(a.s[0].w[0], i);
+    if (ns >= 2) b.s[1].w[0] = ROR32(a.s[1].w[0], i);
+    if (ns >= 3) b.s[2].w[0] = ROR32(a.s[2].w[0], i);
+    if (ns >= 4) b.s[3].w[0] = ROR32(a.s[3].w[0], i);
+    c = MXORBIC(c, a, b, 0, ns);
+    if (ns >= 1) a.s[0].w[0] = c.s[0].w[0];
+    if (ns >= 2) a.s[1].w[0] = c.s[1].w[0];
+    if (ns >= 3) a.s[2].w[0] = c.s[2].w[0];
+    if (ns >= 4) a.s[3].w[0] = c.s[3].w[0];
+  }
+  /* unmask result */
+  if (ns >= 2) a.s[0].w[0] ^= ROR32(a.s[1].w[0], ROT(1));
+  if (ns >= 3) a.s[0].w[0] ^= ROR32(a.s[2].w[0], ROT(2));
+  if (ns >= 4) a.s[0].w[0] ^= ROR32(a.s[3].w[0], ROT(3));
+  return ~a.s[0].w[0];
+}
+forceinline share_t LOADSHARE(uint32_t* data, int ns) {
+  share_t s;
+  uint32_t lo, hi;
+  LDR(lo, data, 0);
+  LDR(hi, data, 4 * ns);
+#if !ASCON_EXTERN_BI
+  BD(s.w[0], s.w[1], lo, hi);
+  if (ns == 2) CLEAR();
+#else
+  s.w[0] = lo;
+  s.w[1] = hi;
+#endif
+  return s;
+}
+forceinline void STORESHARE(uint32_t* data, share_t s, int ns) {
+  uint32_t lo, hi;
+#if !ASCON_EXTERN_BI
+  BI(lo, hi, s.w[0], s.w[1]);
+  if (ns == 2) CLEAR();
+#else
+  lo = s.w[0];
+  hi = s.w[1];
+#endif
+  STR(lo, data, 0);
+  STR(hi, data, 4 * ns);
+}
+forceinline word_t MLOAD(uint32_t* data, int ns) {
+  word_t w = {0};
+  if (ns >= 1) w.s[0] = LOADSHARE(&(data[0]), ns);
+  if (ns >= 2) w.s[1] = LOADSHARE(&(data[1]), ns);
+  if (ns >= 3) w.s[2] = LOADSHARE(&(data[2]), ns);
+  if (ns >= 4) w.s[3] = LOADSHARE(&(data[3]), ns);
+  return w;
+}
+forceinline void MSTORE(uint32_t* data, word_t w, int ns) {
+  if (ns >= 1) STORESHARE(&(data[0]), w.s[0], ns);
+  if (ns >= 2) STORESHARE(&(data[1]), w.s[1], ns);
+  if (ns >= 3) STORESHARE(&(data[2]), w.s[2], ns);
+  if (ns >= 4) STORESHARE(&(data[3]), w.s[3], ns);
+}
+#endif /* WORD_H_ */
--- a/src-ascon/ascon/LICENSE
+++ b/src-ascon/ascon/LICENSE
+CC0 1.0 Universal
+Statement of Purpose
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator and
+subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+Certain owners wish to permanently relinquish those rights to a Work for the
+purpose of contributing to a commons of creative, cultural and scientific
+works ("Commons") that the public can reliably and without fear of later
+claims of infringement build upon, modify, incorporate in other works, reuse
+and redistribute as freely as possible in any form whatsoever and for any
+purposes, including without limitation commercial purposes. These owners may
+contribute to the Commons to promote the ideal of a free culture and the
+further production of creative, cultural and scientific works, or to gain
+reputation or greater distribution for their Work in part through the use and
+efforts of others.
+For these and/or other purposes and motivations, and without any expectation
+of additional consideration or compensation, the person associating CC0 with a
+Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
+and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
+and publicly distribute the Work under its terms, with knowledge of his or her
+Copyright and Related Rights in the Work and the meaning and intended legal
+effect of CC0 on those rights.
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not limited
+to, the following:
+  i. the right to reproduce, adapt, distribute, perform, display, communicate,
+  and translate a Work;
+  ii. moral rights retained by the original author(s) and/or performer(s);
+  iii. publicity and privacy rights pertaining to a person's image or likeness
+  depicted in a Work;
+  iv. rights protecting against unfair competition in regards to a Work,
+  subject to the limitations in paragraph 4(a), below;
+  v. rights protecting the extraction, dissemination, use and reuse of data in
+  a Work;
+  vi. database rights (such as those arising under Directive 96/9/EC of the
+  European Parliament and of the Council of 11 March 1996 on the legal
+  protection of databases, and under any national implementation thereof,
+  including any amended or successor version of such directive); and
+  vii. other similar, equivalent or corresponding rights throughout the world
+  based on applicable law or treaty, and any national implementations thereof.
+2. Waiver. To the greatest extent permitted by, but not in contravention of,
+applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
+unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
+and Related Rights and associated claims and causes of action, whether now
+known or unknown (including existing as well as future claims and causes of
+action), in the Work (i) in all territories worldwide, (ii) for the maximum
+duration provided by applicable law or treaty (including future time
+extensions), (iii) in any current or future medium and for any number of
+copies, and (iv) for any purpose whatsoever, including without limitation
+commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
+the Waiver for the benefit of each member of the public at large and to the
+detriment of Affirmer's heirs and successors, fully intending that such Waiver
+shall not be subject to revocation, rescission, cancellation, termination, or
+any other legal or equitable action to disrupt the quiet enjoyment of the Work
+by the public as contemplated by Affirmer's express Statement of Purpose.
+3. Public License Fallback. Should any part of the Waiver for any reason be
+judged legally invalid or ineffective under applicable law, then the Waiver
+shall be preserved to the maximum extent permitted taking into account
+Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
+is so judged Affirmer hereby grants to each affected person a royalty-free,
+non transferable, non sublicensable, non exclusive, irrevocable and
+unconditional license to exercise Affirmer's Copyright and Related Rights in
+the Work (i) in all territories worldwide, (ii) for the maximum duration
+provided by applicable law or treaty (including future time extensions), (iii)
+in any current or future medium and for any number of copies, and (iv) for any
+purpose whatsoever, including without limitation commercial, advertising or
+promotional purposes (the "License"). The License shall be deemed effective as
+of the date CC0 was applied by Affirmer to the Work. Should any part of the
+License for any reason be judged legally invalid or ineffective under
+applicable law, such partial invalidity or ineffectiveness shall not
+invalidate the remainder of the License, and in such case Affirmer hereby
+affirms that he or she will not (i) exercise any of his or her remaining
+Copyright and Related Rights in the Work or (ii) assert any associated claims
+and causes of action with respect to the Work, in either case contrary to
+Affirmer's express Statement of Purpose.
+4. Limitations and Disclaimers.
+  a. No trademark or patent rights held by Affirmer are waived, abandoned,
+  surrendered, licensed or otherwise affected by this document.
+  b. Affirmer offers the Work as-is and makes no representations or warranties
+  of any kind concerning the Work, express, implied, statutory or otherwise,
+  including without limitation warranties of title, merchantability, fitness
+  for a particular purpose, non infringement, or the absence of latent or
+  other defects, accuracy, or the present or absence of errors, whether or not
+  discoverable, all to the greatest extent permissible under applicable law.
+  c. Affirmer disclaims responsibility for clearing rights of other persons
+  that may apply to the Work or any use thereof, including without limitation
+  any person's Copyright and Related Rights in the Work. Further, Affirmer
+  disclaims responsibility for obtaining any necessary consents, permissions
+  or other rights required for any use of the Work.
+  d. Affirmer understands and acknowledges that Creative Commons is not a
+  party to this document and has no duty or obligation with respect to this
+  CC0 or use of the Work.
+For more information, please see
+<http://creativecommons.org/publicdomain/zero/1.0/>
\ No newline at end of file
--- a/src-ascon/ascon/README.md
+++ b/src-ascon/ascon/README.md
+# Masked Ascon Software Implementations
+This repository contains high-level masked (shared) [Ascon](https://ascon.iaik.tugraz.at/)
+software implementations, mostly written in C. These implementations can be used
+as a starting point to generate device specific C/ASM implementations.
+Masked C implementations requires a minimum amount of ASM instructions.
+Otherwise, the compiler may heavily optimize the code and even combine
+shares. Obviously, the output generated is very sensitive to compiler and
+environment changes and any generated output needs to be security evaluated.
+A preliminary evaluation of these implementations has been performed on some
+[ChipWhisperer](https://www.newae.com/chipwhisperer) devices. The results can
+be reproduced by performing the following steps:
+- Make sure this repository is checked out in the `hardware/victims/firmware` folder of your chipwhisperer installation.
+- Make sure the `jupyter/*.ipynb` scripts are located in the `jupyter` folder of your chipwhisperer installation.
+- Run the shared simpleserial interface jupyter script `jupyter/ascon_sca_sss.ipynb`.
+The masked software interface follows the
+[Call for Protected Software Implementations](https://cryptography.gmu.edu/athena/LWC/Call_for_Protected_Software_Implementations.pdf)
+of the [Cryptographic Engineering Research Group](https://cryptography.gmu.edu/)
+for finalists in the
+[NIST Lightweight Cryptography Competition](https://csrc.nist.gov/projects/lightweight-cryptography).
+The number of shares are defined by the parameters `NUM_SHARES_KEY`,
+`NUM_SHARES_NPUB`, `NUM_SHARES_AD`, `NUM_SHARES_M` and `NUM_SHARES_C` in the
+`api.h` file.
+Additionally, most masked Ascon implementations assume that the shares are
+(32/64-bit) rotated against each other using the parameter `ASCON_ROR_SHARES`
+defined in the `api.h` file. The Ascon specific masking and rotation functions are
+defined in the Python functions `generate_shares` and `combine_shares` as well
+as in the C functions `generate_shares_encrypt`, `generate_shares_decrypt`,
+`combine_shares_encrypt` and `combine_shares_decrypt`.
+Note that an `ASCON_ROR_SHARES` value of `x` corresponds to a right rotation of
+each internal 32-bit share `i` by `x*i mod 32` bits. For 32-bit interleaved
+implementations of Ascon, this corresponds to a right rotation of
+each 64-bit share `i` by `2*x*i mod 32` bits at the interface level.
+# Protection Methods
+- Name of the applied countermeasures:
+  * Masking with (almost) no fresh randomness
+  * Rotation of shares against each other
+  * Mode-level security (mask init/final, plain ad/pt/ct)
+- Tag comparison:
+  * XOR masked tag to state (x3,x4)
+  * Set remaining state to masked zero
+  * Compute masked PB permutation
+  * Plain comparison of result with known output of PB(0)
+- Available implementations:
+  * `protected_bi32_armv6` supporting 2, 3, 4 rotated shares (equal number of
+    shares for key, nonce, adata, plaintext and ciphertext)
+  * `protected_bi32_armv6_leveled` supporting 2, 3, 4 rotated shares for key
+    and 1 share for nonce, adata, plaintext and ciphertext
+- Primary references for masking Ascon:
+  * Joan Daemen, Christoph Dobraunig, Maria Eichlseder, Hannes Groß, Florian Mendel,
+    Robert Primas: "Protecting against Statistical Ineffective Fault Attacks".
+    CHES 2020. https://doi.org/10.13154/tches.v2020.i3.508-543
+  * Aein Rezaei Shahmirzadi, Amir Moradi: "Second-Order SCA Security with almost no
+    Fresh Randomness". CHES 2021. https://doi.org/10.46586/tches.v2021.i3.708-755
+  * Hannes Groß, Stefan Mangard: "Reconciling d+1 Masking in Hardware and Software".
+    CHES 2017. https://eprint.iacr.org/2017/103
+- Primary references for mode-level security of Ascon:
+  * Alexandre Adomnicai, Jacques J. A. Fournier, Laurent Masson: "Masking the
+    Lightweight Authenticated Ciphers ACORN and Ascon in Software". Cryptology
+    ePrint Archive, Report 2018/708. https://eprint.iacr.org/2018/708
+  * Davide Bellizia, Olivier Bronchain, Gaëtan Cassiers, Vincent Grosso, Chun
+    Guo, Charles Momin, Olivier Pereira, Thomas Peters, François-Xavier
+    Standaert: "Mode-Level vs. Implementation-Level Physical Security in
+    Symmetric Cryptography - A Practical Guide Through the Leakage-Resistance
+    Jungle". CRYPTO 2020. https://eprint.iacr.org/2020/211
+# Experimental Setup
+- Measurement platform and device-under-evaluation:
+  * ChipWhisperer, CW308 with STM32F303 UFO target
+  * ChipWhisperer, CW308 with STM32F415 UFO target
+  * ChipWhisperer, CW308 with STM32F405 UFO target
+- STM32F303, STM32F415:
+  * Oscilloscope: ChipWhisperer Lite Scope
+  * Measurement: see ChipWhisperer specification
+  * Sampling rate: clkgen x4
+- STM32F405:
+  * Oszilloscope: Picoscope 6404d
+  * Measurement: CW501 differential probe
+  * Sampling rate: 1GS
+The experimental setup and evalutions for STM32F303 and STM32F415 are
+given in the jupyter scripts in this repository.
+# Attack/Leakage Assessment Characteristics
+- Data inputs and performed operations:
+  * encrypt/decrypt using plain CW simpleserial interface defined in
+    `jupyter/ascon_sca.ipynb`
+  * encrypt/decrypt using shared CW simpleserial interface defined in
+    `jupyter/ascon_sca_sss.ipynb`
+  * STM32F303 and STM32F415: `ASCON_PA_ROUNDS` and `ASCON_PB_ROUNDS` reduced to
+    2 rounds to mostly fit within 24400 samples
+- Source of random and pseudorandom inputs:
+  * STM32F415: randombytes.c using STM32F415 hardware RNG
+  * STM32F303 and STM32F415: custom randombytes.c function using stdlib.h
+    rand() and srand()
+  * Python random.getrandbits function for shared interface
+- Trigger location relative to the execution start time of the algorithm:
+  * Prior and after the call to `crypto_aead_encrypt_shared` and
+    `crypto_aead_decrypt_shared`
+- Time required to collect data for a given attack/leakage assessment:
+  * 30 iterations/second using a target baud rate of 230400
+  * 8 iterations/second using a target baud rate of 38400
+- Total time of the attack/assessment:
+  * About 9 hours per 1 million traces
+- Total size of all traces: not stored
+# Attack Specific Data
+- Number of traces used: up to 8M depending on device and implementation
+- Attack point:
+  * trigger prior and after `crypto_aead_encrypt_shared`
+  * trigger prior and after `crypto_aead_decrypt_shared` (with final `ascon_iszero`)
+  * key, nonce and data are assumed to be randomly masked in each en/decryption
+- Attack/leakage assessment type: Test Vector Leakage Assessment with
+  * fixed key, fixed nonce, fixed 4-byte adata, fixed 4-byte plaintext (ciphertext) vs.
+  * fixed key, random nonce, random 4-byte adata, random 4-byte plaintext (ciphertext)
+- Note that using mode-level countermeasures, parts of the computations are
+  computed in plain. This is the case for the final `ascon_iszero` function
+  or large parts of the `protected_bi32_armv6_leveled` implementation. Plain
+  computations need to be excluded from the t-test evaluation by setting the
+  trigger locations accordingly.
+# Documentation of Results
+Attack script using shared simpleserial interface: `jupyter/ascon_sca_sss.ipynb`
+Note that for the ChipWhisperer Lite Scope only the first 24400 samples have
+been recorded. To cover larger parts of the implementation, the number of rounds
+have been reduced to 2 rounds for PA and PB. This results in about 25000 samples
+for decrypt and slightly less than 25000 samples for encrypt using 2 shares and
+clkgen x4.
+## 3 rotated shares
+- Decryption (2 PA/PB rounds) of `protected_bi32_armv6` on STM32F303 using
+  3 rotated shares and 8M traces:  
+  ![8M](ttest/protected_bi32_armv6/3shares_ror5/CW308_STM32F303_8000000.png)
+## 2 rotated shares with device specific fixes
+Contrary to 3 shares, masking software implementations using only 2 shares is a
+much more difficult challenge, since the 2 shares might easily collide in
+hardware. Although rotating the shares reduces the number of possible situations
+where these 2 shares may collide, device specific fixes are usually still needed
+at some places. 
+The device specific fix for the STM32F405 and STM32F415 targets is to add a
+`MOV <rd>, #0` instruction between locations where shares are unrotated (e.g.
+during bit interleaving or in non-linear functions). Similar fixes might exist
+for other devices.
+- Encryption (12/6 PA/PB rounds) of `protected_bi32_armv6` on STM32F405 using
+  2 rotated shares, device specific fixes, external bit interleaving
+  (can be computed offline, does not depend on key) and ~4.2M traces:  
+  ![~4.2M](ttest/protected_bi32_armv6/2shares_ror5_extbi/CW308_STM32F405_4194368.png)
+- Decryption (2 PA/PB rounds) of `protected_bi32_armv6` on STM32F415 using
+  2 rotated shares with device specific fixes and 4M and 5M traces:  
+  ![4M](ttest/protected_bi32_armv6/2shares_ror5_mov0/CW308_STM32F415_4000000.png)  
+  ![5M](ttest/protected_bi32_armv6/2shares_ror5_mov0/CW308_STM32F415_5000000.png)
+## 2 rotated shares without device specific fixes
+Without device specific fixes, peaks in the t-test are shown after a low number
+of traces (<10k). In the following we show such example graphs.
+- Decryption (2 PA/PB rounds) of `protected_bi32_armv6` on STM32F415 using
+  2 rotated shares, without device specific fixes and 100k traces:  
+  ![100k](ttest/protected_bi32_armv6/2shares_ror5/CW308_STM32F415_100000.png)
+- Decryption (2 PA/PB rounds) of `protected_bi32_armv6` on STM32F303 using
+  2 rotated shares, without device specific fixes and 10k traces:  
+  ![100k](ttest/protected_bi32_armv6/2shares_ror5/CW308_STM32F303_10000.png)
+# Authors
+Florian Dietrich, Christoph Dobraunig, Florian Mendel, Robert Primas, Martin Schläffer
+[//]: # (pandoc --number-sections --from markdown README.md -o Documents/documentation.pdf)
--- a/src-gift/gift/Documents/coversheet.pdf
+++ b/src-gift/gift/Documents/coversheet.pdf
--- a/src-gift/gift/Documents/documentation.pdf
+++ b/src-gift/gift/Documents/documentation.pdf
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/LWC_AEAD_KAT_128_128.txt
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/LWC_AEAD_KAT_128_128.txt
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/designers
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/designers
+Subhadeep Banik
+Avik Chakraborti
+Tetsu Iwata
+Kazuhiko Minematsu
+Mridul Nandi
+Thomas Peyrin
+Yu Sasaki
+Siang Meng Sim
+Yosuke Todo
\ No newline at end of file
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/aead.c
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/aead.c
+/**
+ * GIFT-COFB ARMv7-M implementation (w/ 1st-order masking countermeasure)
+ * following the API defined in the Call for Protected Software Implementations
+ * of Finalists in the NIST Lightweight Cryptography Standardization Process
+ * by George Mason Univeristy: https://cryptography.gmu.edu/athena/LWC/Call_for
+ * _Protected_Software_Implementations.pdf
+ * 
+ * @author      Alexandre Adomnicai
+ *              alex.adomnicai@gmail.com
+ * 
+ * @date        March 2022
+ */
+#include <string.h>
+#include <stdint.h>
+#include "cofb.h"
+#include "giftb128.h"
+#include "randombytes.h"
+#include "crypto_aead_shared.h"
+/**
+ * COFB mode related internal functions.
+ */
+static inline void padding(uint32_t* d, const uint32_t* s, const uint32_t no_of_bytes){
+    int i;
+    if (no_of_bytes == 0) {
+        d[0] = 0x00000080; // little-endian
+        d[1] = 0x00000000;
+        d[2] = 0x00000000;
+        d[3] = 0x00000000;
+    }
+    else if (no_of_bytes < GIFT128_BLOCK_SIZE) {
+        for (i = 0; i < no_of_bytes/4+1; i++)
+            d[i] = s[i];
+        d[i-1] &= ~(0xffffffffL << (no_of_bytes % 4)*8);
+        d[i-1] |= 0x00000080L << (no_of_bytes % 4)*8;
+        for (; i < 4; i++)
+            d[i] = 0x00000000;
+    }
+    else {
+        d[0] = s[0];
+        d[1] = s[1];
+        d[2] = s[2];
+        d[3] = s[3];
+    }
+}
+static void xor_block(uint8_t* d, const uint8_t* s1, const uint8_t* s2, unsigned no_of_bytes) {
+    unsigned i;
+    for (i=0; i<no_of_bytes; i++)
+        d[i] = s1[i] ^ s2[i];
+}
+static inline void double_half_block(uint32_t* x) {
+    uint32_t tmp0;
+    tmp0 = (x)[0];
+    (x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15);
+    (x)[0] |= ((x)[1] & 0x80808080) << 17;
+    (x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15);
+    (x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24;
+}
+static inline void triple_half_block(uint32_t* x) {
+    uint32_t tmp0, tmp1;
+    tmp0 = (x)[0];
+    tmp1 = (x)[1];
+    (x)[0] = (((x)[0] & 0x7f7f7f7f) << 1) | (((x)[0] & 0x80808080) >> 15);
+    (x)[0] |= ((x)[1] & 0x80808080) << 17;
+    (x)[1] = (((x)[1] & 0x7f7f7f7f) << 1) | (((x)[1] & 0x80808080) >> 15);
+    (x)[1] ^= (((tmp0 >> 7) & 1) * 27) << 24;
+    (x)[0] ^= tmp0;
+    (x)[1] ^= tmp1;
+}
+static inline void g(uint32_t *x) {
+    uint32_t tmp0, tmp1;
+    tmp0 = (x)[0];
+    tmp1 = (x)[1];
+    (x)[0] = (x)[2];
+    (x)[1] = (x)[3];
+    (x)[2] = ((tmp0 & 0x7f7f7f7f) << 1) | ((tmp0 & 0x80808080) >> 15);
+    (x)[2] |= ((tmp1 & 0x80808080) << 17);
+    (x)[3] = ((tmp1 & 0x7f7f7f7f) << 1) | ((tmp1 & 0x80808080) >> 15);
+    (x)[3] |= ((tmp0 & 0x80808080) << 17);
+}
+static inline void rho1(uint8_t* d, uint8_t* y, const uint8_t* m, uint8_t n)
+{
+    g((uint32_t *)y);
+    padding((uint32_t *)d, (uint32_t *)m, n);
+    xor_block(d, d, y, 16);
+}
+static inline void rho(uint8_t* y, const uint8_t* m, uint8_t* x, uint8_t* c, unsigned long long n)
+{
+    xor_block(c, y, m, n);
+    rho1(x, y, m, n);
+}
+static inline void rho_prime(uint8_t* y, const uint8_t*c, uint8_t* x, uint8_t* m, unsigned long long n)
+{
+    xor_block(m, y, c, n);
+    rho1(x, y, m, n);
+}
+/****************************************************************************
+* Constant-time implementation of the GIFT-COFB authenticated cipher based on
+* fixsliced GIFTb-128. Encryption/decryption is handled by the same function,
+* depending on the 'mode' parameter (1/0).
+****************************************************************************/
+int giftcofb_crypt(
+    uint8_t* out,
+    const uint8_t* key,
+    const uint8_t* key_m,
+    const uint8_t* nonce,
+    const uint8_t* ad, unsigned long long ad_len,
+    const uint8_t* in, unsigned long long in_len,
+    const int mode)
+{
+    int i, ret;
+    uint32_t offset[GIFT128_BLOCK_SIZE/8];
+    uint8_t x[GIFT128_BLOCK_SIZE], y[GIFT128_BLOCK_SIZE], tag[TAG_SIZE];
+    masked_rkey m_rkey;
+    // save the tag for verification in case out = in
+    if (mode == COFB_DECRYPT)
+        memcpy(tag, in+in_len, TAG_SIZE);
+    gift128_keyschedule(key, m_rkey.rkey, key_m);
+    giftb128_encrypt_block(y, m_rkey.rkey, nonce);
+    offset[0] = ((uint32_t*)y)[0];
+    offset[1] = ((uint32_t*)y)[1];
+    while(ad_len > GIFT128_BLOCK_SIZE){
+        rho1(x, y, ad, GIFT128_BLOCK_SIZE);
+        double_half_block(offset);
+        XOR_TOP_BAR_BLOCK((uint32_t *)x, offset);
+        giftb128_encrypt_block(y, m_rkey.rkey, x);
+        ad += GIFT128_BLOCK_SIZE;
+        ad_len -= GIFT128_BLOCK_SIZE;
+    }
+    triple_half_block(offset);
+    if((ad_len % GIFT128_BLOCK_SIZE != 0) || (ad_len == 0))
+        triple_half_block(offset);
+    if (in_len == 0) {
+        triple_half_block(offset);
+        triple_half_block(offset);
+    }
+    rho1(x, y, ad, ad_len);
+    XOR_TOP_BAR_BLOCK((uint32_t *)x, offset);
+    giftb128_encrypt_block(y, m_rkey.rkey, x);
+    while (in_len > GIFT128_BLOCK_SIZE){
+        double_half_block(offset);
+        if (mode == COFB_ENCRYPT)
+            rho(y, in, x, out, GIFT128_BLOCK_SIZE);
+        else
+            rho_prime(y, in, x, out, GIFT128_BLOCK_SIZE);
+        XOR_TOP_BAR_BLOCK((uint32_t *)x, offset);
+        giftb128_encrypt_block(y, m_rkey.rkey, x);
+        in += GIFT128_BLOCK_SIZE;
+        out += GIFT128_BLOCK_SIZE;
+        in_len -= GIFT128_BLOCK_SIZE;
+    }
+    if (in_len != 0) {
+        triple_half_block(offset);
+        if(in_len % GIFT128_BLOCK_SIZE != 0)
+            triple_half_block(offset);
+        if (mode == COFB_ENCRYPT) {
+            rho(y, in, x, out, in_len);
+            out += in_len;
+        }
+        else {
+            rho_prime(y, in, x, out, in_len);
+            in += in_len;
+        }
+        XOR_TOP_BAR_BLOCK((uint32_t *)x, offset);
+        giftb128_encrypt_block(y, m_rkey.rkey, x);
+    }
+    if (mode == COFB_ENCRYPT) { // encryption mode
+        memcpy(out, y, TAG_SIZE);
+        return 0;
+    }
+    // decrypting
+    ret = 0;
+    for(i = 0; i < TAG_SIZE; i++)
+        ret |= tag[i] ^ y[i];
+    return ret;
+}
+/**
+ * Wrapper for compliance with the API defined in the call for protected
+ * implementations from GMU.
+ * 
+ * Converts an array with 4 mask_*_uint32_t element 2 16-byte byte arrays
+ * (NUM_SHARES = 2).
+ * The first and second output arrays contain the first and second shares in a
+ * byte-wise representation, respectively.
+ * 
+ * Useful to pass the 16-byte block to mask the internal state and the 16-byte
+ * key share as inputs to the Romulus functions.
+ */
+static void shares_to_bytearr_2(
+    uint8_t bytearr_0[],
+    uint8_t bytearr_1[],
+    const mask_key_uint32_t *ks)
+{
+    int i;
+    // pack the first shares into bytearr_0
+    for(i = 0; i < KEY_SIZE/4; i++) {
+        bytearr_0[i*4 + 0] = (uint8_t)((ks[i].shares[0] >> 0)  & 0xff);
+        bytearr_0[i*4 + 1] = (uint8_t)((ks[i].shares[0] >> 8)  & 0xff);
+        bytearr_0[i*4 + 2] = (uint8_t)((ks[i].shares[0] >> 16) & 0xff);
+        bytearr_0[i*4 + 3] = (uint8_t)((ks[i].shares[0] >> 24) & 0xff);
+    }
+    // pack the second shares into bytearr_1
+    // use a distinct loop to avoid potential HD-based leakages
+    for(i = 0; i < KEY_SIZE/4; i++) {
+        bytearr_1[i*4 + 0] = (uint8_t)((ks[i].shares[1] >> 0)  & 0xff);
+        bytearr_1[i*4 + 1] = (uint8_t)((ks[i].shares[1] >> 8)  & 0xff);
+        bytearr_1[i*4 + 2] = (uint8_t)((ks[i].shares[1] >> 16) & 0xff);
+        bytearr_1[i*4 + 3] = (uint8_t)((ks[i].shares[1] >> 24) & 0xff);
+    }
+}
+/**
+ * Same as 'shares_to_bytearr_2' but with no masking => only one output buffer.
+ */
+static void shares_to_bytearr(
+    uint8_t bytearr[],
+    const mask_m_uint32_t *ms, unsigned long long mlen)
+{
+    unsigned long long i, r;
+    r = mlen % 4;
+    for(i = 0; i < mlen/4; i++) {
+        bytearr[i*4 + 0] = (uint8_t)((ms[i].shares[0] >> 0)  & 0xff);
+        bytearr[i*4 + 1] = (uint8_t)((ms[i].shares[0] >> 8)  & 0xff);
+        bytearr[i*4 + 2] = (uint8_t)((ms[i].shares[0] >> 16) & 0xff);
+        bytearr[i*4 + 3] = (uint8_t)((ms[i].shares[0] >> 24) & 0xff);
+    }
+    for(i = 0; i < r; i++)
+        bytearr[mlen - r + i] = (uint8_t)((ms[mlen/4].shares[0] >> 8*i)  & 0xff);
+}
+/**
+ * Split the encryption key into two shares and pack the other inputs according
+ * to the call for protected software implementations from GMU.
+ */
+void generate_shares_encrypt(
+    const unsigned char *m, mask_m_uint32_t *ms, const unsigned long long mlen,
+    const unsigned char *ad, mask_ad_uint32_t *ads , const unsigned long long adlen,
+    const unsigned char *npub, mask_npub_uint32_t *npubs,
+    const unsigned char *k, mask_key_uint32_t *ks)
+{
+    unsigned long long i, r;
+    // msg is not split into shares, simple copy
+    r = mlen % 4;
+    for(i = 0; i < mlen/4; i++) {
+        ms[i].shares[0]  = (uint32_t)(m[i*4 + 0] << 0);
+        ms[i].shares[0] |= (uint32_t)(m[i*4 + 1] << 8);
+        ms[i].shares[0] |= (uint32_t)(m[i*4 + 2] << 16);
+        ms[i].shares[0] |= (uint32_t)(m[i*4 + 3] << 24);
+    }
+    // pad with 0s for the last incomplete word
+    if (r) {
+        ms[mlen/4 + 1].shares[0]  = 0x00000000;
+        for(i = 0; i < r; i++)
+            ms[mlen/4].shares[0] |= (uint32_t)(m[mlen - r + i] << 8*i);
+    }
+    // ad is not split into shares, simple copy
+    r = adlen % 4;
+    for(i = 0; i < adlen/4; i++) {
+        ads[i].shares[0]  = (uint32_t)(ad[i*4 + 0] << 0);
+        ads[i].shares[0] |= (uint32_t)(ad[i*4 + 1] << 8);
+        ads[i].shares[0] |= (uint32_t)(ad[i*4 + 2] << 16);
+        ads[i].shares[0] |= (uint32_t)(ad[i*4 + 3] << 24);
+    }
+    // pad with 0s for the last incomplete word
+    if (r) {
+        ads[adlen/4 + 1].shares[0]  = 0x00000000;
+        for(i = 0; i < r; i++)
+            ads[adlen/4].shares[0] |= (uint32_t)(ad[adlen - r + i] << 8*i);
+    }
+    // npub is not split into shares, simple copy
+    for(i = 0; i < GIFT128_BLOCK_SIZE/4; i++) {
+        npubs[i].shares[0]  = (uint32_t)(npub[i*4 + 0] << 0);
+        npubs[i].shares[0] |= (uint32_t)(npub[i*4 + 1] << 8);
+        npubs[i].shares[0] |= (uint32_t)(npub[i*4 + 2] << 16);
+        npubs[i].shares[0] |= (uint32_t)(npub[i*4 + 3] << 24);
+    }
+    // encryption key is split into 2 shares (1st-order masking)
+    randombytes((uint8_t *)(&(ks[0].shares[1])), 4);
+    randombytes((uint8_t *)(&(ks[1].shares[1])), 4);
+    randombytes((uint8_t *)(&(ks[2].shares[1])), 4);
+    randombytes((uint8_t *)(&(ks[3].shares[1])), 4);
+    ks[0].shares[0] = ks[0].shares[1] ^ ((uint32_t *)k)[0];
+    ks[1].shares[0] = ks[1].shares[1] ^ ((uint32_t *)k)[1];
+    ks[2].shares[0] = ks[2].shares[1] ^ ((uint32_t *)k)[2];
+    ks[3].shares[0] = ks[3].shares[1] ^ ((uint32_t *)k)[3];
+}
+/**
+ * Split the encryption key into two shares and pack the other inputs according
+ * to the call for protected software implementations from GMU.
+ */
+void generate_shares_decrypt(
+    //const unsigned char *c, mask_m_uint32_t *cs, const unsigned long long clen,
+    const unsigned char *c, mask_c_uint32_t *cs, const unsigned long long clen,
+    const unsigned char *ad, mask_ad_uint32_t *ads , const unsigned long long adlen,
+    const unsigned char *npub, mask_npub_uint32_t *npubs,
+    const unsigned char *k, mask_key_uint32_t *ks)
+{
+    unsigned long long i, r;
+    // msg is not split into shares, simple copy
+    r = clen % 4;
+    for(i = 0; i < clen/4; i++) {
+        cs[i].shares[0]  = (uint32_t)(c[i*4 + 0] << 0);
+        cs[i].shares[0] |= (uint32_t)(c[i*4 + 1] << 8);
+        cs[i].shares[0] |= (uint32_t)(c[i*4 + 2] << 16);
+        cs[i].shares[0] |= (uint32_t)(c[i*4 + 3] << 24);
+    }
+    // pad with 0s for the last incomplete word
+    if (r) {
+        cs[clen/4 + 1].shares[0]  = 0x00000000;
+        for(i = 0; i < r; i++)
+            cs[clen/4].shares[0] |= (uint32_t)(c[clen - r + i] << 8*i);
+    }
+    // ad is not split into shares, simple copy
+    r = adlen % 4;
+    for(i = 0; i < adlen/4; i++) {
+        ads[i].shares[0]  = (uint32_t)(ad[i*4 + 0] << 0);
+        ads[i].shares[0] |= (uint32_t)(ad[i*4 + 1] << 8);
+        ads[i].shares[0] |= (uint32_t)(ad[i*4 + 2] << 16);
+        ads[i].shares[0] |= (uint32_t)(ad[i*4 + 3] << 24);
+    }
+    // pad with 0s for the last incomplete word
+    if (r) {
+        ads[adlen/4 + 1].shares[0]  = 0x00000000;
+        for(i = 0; i < r; i++)
+            ads[adlen/4].shares[0] |= (uint32_t)(ad[adlen - r + i] << 8*i);
+    }
+    // npub is not split into shares, simple copy
+    for(i = 0; i < GIFT128_BLOCK_SIZE/4; i++) {
+        npubs[i].shares[0]  = (uint32_t)(npub[i*4 + 0] << 0);
+        npubs[i].shares[0] |= (uint32_t)(npub[i*4 + 1] << 8);
+        npubs[i].shares[0] |= (uint32_t)(npub[i*4 + 2] << 16);
+        npubs[i].shares[0] |= (uint32_t)(npub[i*4 + 3] << 24);
+    }
+    // encryption key is split into 2 shares (1st-order masking)
+    randombytes((uint8_t *)(&(ks[0].shares[1])), 4);
+    randombytes((uint8_t *)(&(ks[1].shares[1])), 4);
+    randombytes((uint8_t *)(&(ks[2].shares[1])), 4);
+    randombytes((uint8_t *)(&(ks[3].shares[1])), 4);
+    ks[0].shares[0] = ks[0].shares[1] ^ ((uint32_t *)k)[0];
+    ks[1].shares[0] = ks[1].shares[1] ^ ((uint32_t *)k)[1];
+    ks[2].shares[0] = ks[2].shares[1] ^ ((uint32_t *)k)[2];
+    ks[3].shares[0] = ks[3].shares[1] ^ ((uint32_t *)k)[3];
+}
+/**
+ * Combine the shares into the output ciphertext buffer.
+ */
+void combine_shares_encrypt(
+    const mask_c_uint32_t *cs, unsigned char *c, unsigned long long clen) {
+    shares_to_bytearr(c, (mask_m_uint32_t *)cs, clen);
+}
+/**
+ * Combine the shares into the output plaintext buffer.
+ */
+void combine_shares_decrypt(
+    const mask_m_uint32_t *ms, unsigned char *m, unsigned long long mlen) {
+    shares_to_bytearr(m, ms, mlen);
+}
+/*
+ * Main encryption/authentication function.
+ */
+int crypto_aead_encrypt_shared(
+    mask_c_uint32_t* cs, unsigned long long *clen,
+    const mask_m_uint32_t *ms, unsigned long long mlen,
+    const mask_ad_uint32_t *ads, unsigned long long adlen,
+    const mask_npub_uint32_t *npubs,
+    const mask_key_uint32_t *ks)
+{
+    uint8_t key[KEY_SIZE];
+    uint8_t key_m[KEY_SIZE];
+    shares_to_bytearr_2(key, key_m, ks);
+    *clen = mlen + TAG_SIZE;
+    return giftcofb_crypt(
+        (uint8_t *)(cs),
+        key, key_m,
+        (uint8_t *)(npubs),
+        (uint8_t *)(ads), adlen,
+        (uint8_t *)(ms), mlen,
+        COFB_ENCRYPT);
+}
+/*
+ * Main decryption/tag verification function.
+ */
+int crypto_aead_decrypt_shared(
+    mask_m_uint32_t* ms, unsigned long long *mlen,
+    const mask_c_uint32_t *cs, unsigned long long clen,
+    const mask_ad_uint32_t *ads, unsigned long long adlen,
+    const mask_npub_uint32_t *npubs,
+    const mask_key_uint32_t *ks)
+{
+    uint8_t key[KEY_SIZE];
+    uint8_t key_m[KEY_SIZE];
+    shares_to_bytearr_2(key, key_m, ks);
+    if (clen < TAG_SIZE)
+        return -1;
+    *mlen = clen - TAG_SIZE;
+    return giftcofb_crypt(
+        (uint8_t *)ms,
+        key, key_m,
+        (uint8_t *)npubs,
+        (uint8_t *)ads, adlen,
+        (uint8_t *)cs, *mlen,
+        COFB_DECRYPT);
+}
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/api.h
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/api.h
+#define CRYPTO_KEYBYTES     16
+#define CRYPTO_NSECBYTES    0
+#define CRYPTO_NPUBBYTES    16
+#define CRYPTO_ABYTES       16
+#define CRYPTO_NOOVERLAP    1
+#define CRYPTO_BYTES        32
+#define NUM_SHARES_M 		1
+#define NUM_SHARES_C 		1
+#define NUM_SHARES_AD 		1
+#define NUM_SHARES_NPUB 	1 
+#define NUM_SHARES_KEY 		2 // 1st-order masking => 2 shares
\ No newline at end of file
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/architectures
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/architectures
+arm
\ No newline at end of file
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/cofb.h
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/cofb.h
+#ifndef GIFT_COFB_H_
+#define GIFT_COFB_H_
+#include <stdint.h>
+#define TAG_SIZE        16
+#define KEY_SIZE        16
+#define COFB_ENCRYPT    1
+#define COFB_DECRYPT    0
+typedef struct masked_rkey {
+    uint32_t rkey[80];
+    uint32_t rkey_mask[80];
+} masked_rkey;
+#define XOR_BLOCK(x, y, z) ({       \
+    (x)[0] = (y)[0] ^ (z)[0];       \
+    (x)[1] = (y)[1] ^ (z)[1];       \
+    (x)[2] = (y)[2] ^ (z)[2];       \
+    (x)[3] = (y)[3] ^ (z)[3];       \
+})
+#define XOR_TOP_BAR_BLOCK(x, y) ({  \
+    (x)[0] ^= (y)[0];               \
+    (x)[1] ^= (y)[1];               \
+})
+#endif // GIFT_COFB_H_
\ No newline at end of file
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/crypto_aead.c
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/crypto_aead.c
+#include "crypto_aead.h"
+#include <stdlib.h>
+#include <stdint.h>
+#include "api.h"
+#include "crypto_aead_shared.h"
+#ifdef SS_VER
+#include "hal.h"
+#else
+#define trigger_high()
+#define trigger_low()
+#endif
+int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
+                        const unsigned char* m, unsigned long long mlen,
+                        const unsigned char* a, unsigned long long alen,
+                        const unsigned char* nsec, const unsigned char* npub,
+                        const unsigned char* k) {
+  (void)nsec;
+  /* dynamic allocation of input/output shares */
+  mask_key_uint32_t* ks = malloc(CRYPTO_KEYBYTES/sizeof(uint32_t));
+  mask_npub_uint32_t* ns = malloc(CRYPTO_NPUBBYTES/sizeof(uint32_t));
+  mask_ad_uint32_t* as = malloc(alen/sizeof(uint32_t)+1);
+  mask_m_uint32_t* ms = malloc(mlen/sizeof(uint32_t)+1);
+  mask_c_uint32_t* cs = malloc(*clen/sizeof(uint32_t)+1);
+  /* mask plain input data */
+  generate_shares_encrypt(m, ms, mlen, a, as, alen, npub, ns, k, ks);
+  /* call shared interface of ascon encrypt */
+  //trigger_high();
+  crypto_aead_encrypt_shared(cs, clen, ms, mlen, as, alen, ns, ks);
+  //trigger_low();
+  /* unmask shared output data */
+  combine_shares_encrypt(cs, c, *clen);
+  /* free shares */
+  free(ks);
+  free(ns);
+  free(as);
+  free(ms);
+  free(cs);
+  return 0;
+}
+int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
+                        unsigned char* nsec, const unsigned char* c,
+                        unsigned long long clen, const unsigned char* a,
+                        unsigned long long alen, const unsigned char* npub,
+                        const unsigned char* k) {
+  int result = 0;
+  (void)nsec;
+  if (clen < CRYPTO_ABYTES) return -1;
+  /* dynamic allocation of input/output shares */
+  mask_key_uint32_t* ks = malloc(CRYPTO_KEYBYTES/sizeof(uint32_t));
+  mask_npub_uint32_t* ns = malloc(CRYPTO_NPUBBYTES/sizeof(uint32_t));
+  mask_ad_uint32_t* as = malloc(alen/sizeof(uint32_t)+1);
+  mask_m_uint32_t* ms = malloc(*mlen/sizeof(uint32_t)+1);
+  mask_c_uint32_t* cs = malloc(clen/sizeof(uint32_t)+1);
+  /* mask plain input data */
+  generate_shares_decrypt(c, cs, clen, a, as, alen, npub, ns, k, ks);
+  /* call shared interface of ascon decrypt */
+  //trigger_high();
+  result = crypto_aead_decrypt_shared(ms, mlen, cs, clen, as, alen, ns, ks);
+  //trigger_low();
+  /* unmask shared output data */
+  combine_shares_decrypt(ms, m, *mlen);
+  /* free shares */
+  free(ks);
+  free(ns);
+  free(as);
+  free(ms);
+  free(cs);
+  return result;
+}
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/crypto_aead_shared.h
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/crypto_aead_shared.h
+/**
+ * API defined by the Cryptographic Engineering Research Group (CREG) from
+ * George Mason University (GMU) in their call for protected software
+ * implementations of NIST LWC finalists.
+ */ 
+#include "api.h"
+#include <stdint.h>
+typedef struct {
+    uint32_t shares[NUM_SHARES_M];
+} mask_m_uint32_t;
+typedef struct {
+    uint32_t shares[NUM_SHARES_C];
+} mask_c_uint32_t;
+typedef struct {
+    uint32_t shares[NUM_SHARES_AD];
+} mask_ad_uint32_t;
+typedef struct {
+    uint32_t shares[NUM_SHARES_NPUB];
+} mask_npub_uint32_t;
+typedef struct {
+    uint32_t shares[NUM_SHARES_KEY];
+} mask_key_uint32_t;
+int crypto_aead_encrypt_shared(
+    mask_c_uint32_t* cs, unsigned long long *clen,
+    const mask_m_uint32_t *ms, unsigned long long mlen,
+    const mask_ad_uint32_t *ads, unsigned long long adlen,
+    const mask_npub_uint32_t *npubs,
+    const mask_key_uint32_t *ks
+);
+int crypto_aead_decrypt_shared(
+    mask_m_uint32_t* ms, unsigned long long *mlen,
+    const mask_c_uint32_t *cs, unsigned long long clen,
+    const mask_ad_uint32_t *ads, unsigned long long adlen,
+    const mask_npub_uint32_t *npubs,
+    const mask_key_uint32_t *ks
+);
+void generate_shares_encrypt(
+    const unsigned char *m, mask_m_uint32_t *ms, const unsigned long long mlen,
+    const unsigned char *ad, mask_ad_uint32_t *ads, const unsigned long long adlen,
+    const unsigned char *npub, mask_npub_uint32_t *npubs,
+    const unsigned char *k, mask_key_uint32_t *ks
+);
+void generate_shares_decrypt(
+    //const unsigned char *c, mask_m_uint32_t *cs, const unsigned long long clen,
+    const unsigned char *c, mask_c_uint32_t *cs, const unsigned long long clen,
+    const unsigned char *ad, mask_ad_uint32_t *ads, const unsigned long long adlen,
+    const unsigned char *npub, mask_npub_uint32_t *npubs,
+    const unsigned char *k, mask_key_uint32_t *ks
+);
+void combine_shares_encrypt(
+    const mask_c_uint32_t *cs, unsigned char *c, unsigned long long clen
+);
+void combine_shares_decrypt(
+    const mask_m_uint32_t *ms, unsigned char *m, unsigned long long mlen
+);
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/giftb128.S
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/giftb128.S
+/****************************************************************************
+* 1st order masked ARM assembly implementation of the GIFT-128 block cipher.
+* See 'Fixslicing: A New GIFT Representation' paper available at
+* https://eprint.iacr.org/2020/412 for more details.
+*
+* @author   Alexandre Adomnicai, Nanyang Technological University,
+*           alexandre.adomnicai@ntu.edu.sg
+*
+* @date     July 2021
+****************************************************************************/
+.syntax unified
+.thumb
+.type rconst,%object
+rconst:
+.word 0x10000008, 0x80018000, 0x54000002, 0x01010181
+.word 0x8000001f, 0x10888880, 0x6001e000, 0x51500002
+.word 0x03030180, 0x8000002f, 0x10088880, 0x60016000
+.word 0x41500002, 0x03030080, 0x80000027, 0x10008880
+.word 0x4001e000, 0x11500002, 0x03020180, 0x8000002b
+.word 0x10080880, 0x60014000, 0x01400002, 0x02020080
+.word 0x80000021, 0x10000080, 0x0001c000, 0x51000002
+.word 0x03010180, 0x8000002e, 0x10088800, 0x60012000
+.word 0x40500002, 0x01030080, 0x80000006, 0x10008808
+.word 0xc001a000, 0x14500002, 0x01020181, 0x8000001a
+/******************************************************************************
+* Macro to compute the SWAPMOVE technique.
+*   - out0,out1     output registers
+*   - in0,in1       input registers
+*   - m             mask
+*   - n             shift value
+*   - tmp           temporary register
+******************************************************************************/
+.macro swpmv    out0, out1, in0, in1, m, n, tmp
+    eor     \tmp, \in1, \in0, lsr \n
+    and     \tmp, \m
+    eor     \out1, \in1, \tmp
+    eor     \out0, \in0, \tmp, lsl \n
+.endm
+/******************************************************************************
+* Macro to compute a nibble-wise rotation to the right.
+*   - out           output register
+*   - in            input register
+*   - m0,m1         masks
+*   - n0,n1         shift value
+*   - tmp           temporary register
+******************************************************************************/
+.macro nibror   out, in, m0, m1, n0, n1, tmp
+    and     \tmp, \m0, \in, lsr \n0
+    and     \out, \in, \m1
+    orr     \out, \tmp, \out, lsl \n1
+.endm
+/******************************************************************************
+* 1st-order secure AND between two masked values. Technique from the paper
+* 'Optimal First-Order Boolean Masking for Embedded IoT Devices' available at
+* https://orbilu.uni.lu/bitstream/10993/37740/1/Optimal_Masking.pdf.
+*   - z1,z2         output shares
+*   - x1,x2         1st input shares
+*   - y1,y2         2nd input shares
+*   - tmp           temporary register
+******************************************************************************/
+.macro secand   z1, z2, x1, x2, y1, y2, tmp
+    orn     \tmp, \x1, \y2
+    and     \z1, \x1, \y1
+    eor     \z1, \tmp, \z1
+    orn     \tmp, \x2, \y2
+    and     \z2, \x2, \y1
+    eor     \z2, \z2, \tmp
+.endm
+/******************************************************************************
+* 1st-order secure OR between two masked values. Technique from the paper
+* 'Optimal First-Order Boolean Masking for Embedded IoT Devices' available at
+* https://orbilu.uni.lu/bitstream/10993/37740/1/Optimal_Masking.pdf.
+*   - z1,z2         output shares
+*   - x1,x2         1st input shares
+*   - y1,y2         2nd input shares
+*   - tmp           temporary register
+******************************************************************************/
+.macro secor    z1, z2, x1, x2, y1, y2, tmp
+    orr     \tmp, \x1, \y2
+    and     \z1, \x1, \y1
+    eor     \z1, \tmp, \z1
+    and     \tmp, \x2, \y2
+    orr     \z2, \x2, \y1
+    eor     \z2, \z2, \tmp
+.endm
+/******************************************************************************
+* 1st-order secure XOR between two masked values.
+*   - z1,z2         output shares
+*   - x1,x2         1st input shares
+*   - y1,y2         2nd input shares
+******************************************************************************/
+.macro secxor   z1, z2, x1, x2, y1, y2
+    eor     \z1, \x1, \y1
+    eor     \z2, \x2, \y2
+.endm
+/******************************************************************************
+* 1st-order masked S-box. Registers r10,r3 always refer to state[1] while 
+* r11,r4 always refer to state[2].
+*   - in0           1st input register (i.e. state[0])
+*   - in3           4th input register (i.e. state[3])
+******************************************************************************/
+.macro sbox     in0, in0_m, in3, in3_m
+    secand  r8, r7, \in0, \in0_m, r11, r4, r6
+    secxor  r10, r3, r10, r3, r8, r7
+    secand  r8, r7, r10, r3, \in3, \in3_m, r6
+    secxor  \in0, \in0_m, \in0, \in0_m, r8, r7
+    secor   r8, r7, \in0, \in0_m, r10, r3, r6
+    secxor  r11, r4, r11, r4, r8, r7
+    secxor  \in3, \in3_m, \in3, \in3_m, r11, r4
+    secxor  r10, r3, r10, r3, \in3, \in3_m
+    secand  r8, r7, \in0, \in0_m, r10, r3, r6
+    secxor  r11, r4, r11, r4, r8, r7
+    mvn     \in3, \in3
+.endm
+/******************************************************************************
+* 1st-order masked linear layer for rounds i s.t. i % 5 = 0.
+******************************************************************************/
+.macro llayer0
+    mvn     r6, r14, lsl #3                 // r6<- 0x77777777 for nibror
+    nibror  r12, r12, r6, r14, 1, 3, r8     // nibror(r12,1)
+    nibror  r5, r5, r6, r14, 1, 3, r8       // mask correction
+    nibror  r11, r11, r14, r6, 3, 1, r8     // nibror(r11,3)
+    nibror  r4, r4, r14, r6, 3, 1, r8       // mask correction
+    orr     r6, r14, r14, lsl #1            // r6 <- 0x33333333 for nibror
+    nibror  r10, r10, r6, r6, 2, 2, r8      // nibror(r10, 2)
+    nibror  r3, r3, r6, r6, 2, 2, r8        // mask correction
+.endm
+/******************************************************************************
+* 1st-order masked linear layer for rounds i s.t. i % 5 = 1.
+******************************************************************************/
+.macro llayer1
+    movw    r6, #0x000f
+    movt    r6, #0x000f                     // r6 <- 0x000f000f for halfror
+    mvn     r7, r6, lsl #12                 // r7 <- 0x0fff0fff for halfror
+    nibror  r9, r9, r7, r6,  4,  12, r8     // halfror(r9,4)
+    nibror  r2, r2, r7, r6,  4,  12, r8     // mask correction
+    nibror  r11, r11, r6, r7,  12,  4, r8   // halfror(r11,12)
+    nibror  r4, r4, r6, r7,  12,  4, r8     // mask correction
+    rev16   r10, r10                        // halfror(r10,8)
+    rev16   r3, r3                          // mask correction
+.endm
+/******************************************************************************
+* 1st-order masked linear layer for rounds i s.t. i % 5 = 2.
+******************************************************************************/
+.macro llayer2
+    movw    r6, #0x5555
+    movt    r6, #0x5555                     // r6 <- 0x55555555 for swpmv
+    swpmv   r10, r10, r10, r10, r6, #1, r8  // swpmv(r10, r10, 0x55..55, 1)
+    swpmv   r3, r3, r3, r3, r6, #1, r8      // mask correction
+    eor     r8, r12, r12, lsr #1
+    and     r8, r8, r6, lsr #16
+    eor     r12, r12, r8
+    eor     r12, r12, r8, lsl #1            // swpmv(r12, r12, 0x55550000, 1)
+    eor     r8, r5, r5, lsr #1
+    and     r8, r8, r6, lsr #16
+    eor     r5, r5, r8
+    eor     r5, r5, r8, lsl #1              // mask correction
+    eor     r8, r11, r11, lsr #1
+    and     r8, r8, r6, lsl #16
+    eor     r11, r11, r8
+    eor     r11, r11, r8, lsl #1            // swpmv(r11, r11, 0x00005555, 1)
+    eor     r8, r4, r4, lsr #1
+    and     r8, r8, r6, lsl #16
+    eor     r4, r4, r8
+    eor     r4, r4, r8, lsl #1              // mask correction
+.endm
+/******************************************************************************
+* 1st-order masked linear layer for rounds i s.t. i % 5 = 3.
+******************************************************************************/
+.macro llayer3
+    movw    r6, #0x0f0f
+    movt    r6, #0x0f0f                     // r6 <- 0x0f0f0f0f for byteror
+    nibror  r10, r10, r6, r6, #4, #4, r8    // byteror(r10,4)
+    nibror  r3, r3, r6, r6, #4, #4, r8      // mask correction
+    orr     r6, r6, r6, lsl #2              // r6 <- 0x3f3f3f3f for byteror
+    mvn     r8, r6
+    and     r7, r8, r11, lsl #6
+    and     r11, r6, r11, lsr #2
+    orr     r11, r11, r7                    // byteror(r11,2)
+    and     r7, r8, r4, lsl #6
+    and     r4, r6, r4, lsr #2
+    orr     r4, r4, r7                      // mask correction
+    mvn     r8, r6, lsr #6                  // r8 <- 0xc0c0c0c0 for byteror
+    nibror  r9, r9, r8, r6, #6, #2, r7      // byteror(r9, 6)
+    nibror  r2, r2, r8, r6, #6, #2, r7      // mask correction
+.endm
+/******************************************************************************
+* 1st-order masked add round key.
+******************************************************************************/
+.macro ark  in0, ror_idx0, ror_idx1
+    ldr.w   r6, [r1], #4                    // load 1st rkey word
+    ldr.w   r7, [r1], #4                    // load 2nd rkey word
+    eor     r10, r6, r10, ror \ror_idx0     // add 1st rkey word
+    eor     r11, r7, r11, ror \ror_idx1     // add 2nd rkey word
+    ldr.w   r6, [r1, #312]                  // load 1st rkey_mask
+    ldr.w   r7, [r1, #316]                  // load 2nd rkey_mask
+    ldr.w   r14, [r0], #4                   // load rconst
+    eor     r3, r6, r3, ror \ror_idx0       // mask correction
+    eor     r4, r7, r4, ror \ror_idx1       // mask correction
+    eor     \in0, \in0, r14                 // add rconst
+.endm
+/******************************************************************************
+* 1st-order masked quintuple round.
+******************************************************************************/
+quintuple_round_masked:
+    str.w   r14, [sp]
+    movw    r14, #0x1111
+    movt    r14, #0x1111                       // r14<- 0x11111111
+    sbox    r9, r2, r12, r5                 // 1st round
+    llayer0
+    ark     r9, #0, #0
+    sbox    r12, r5, r9, r2                 // 2nd round
+    llayer1
+    ark     r12, #0, #0
+    sbox    r9, r2, r12, r5                 // 3rd round
+    llayer2
+    ark     r9, #0, #16
+    ror     r5, #16
+    ror     r12, #16
+    sbox    r12, r5, r9, r2                 // 4th round
+    llayer3 
+    ark     r12, #0, #0
+    sbox    r9, r2, r12, r5                 // 5th round
+    ark     r9, #16, #8
+    ldr.w   r14, [sp]
+    eor     r9, r9, r12, ror #24
+    eor     r12, r9, r12, ror #24
+    eor     r9, r9, r12                     // swap r9 with r12 >>> 24
+    eor     r2, r2, r5, ror #24
+    eor     r5, r2, r5, ror #24
+    eor     r2, r2, r5                      // swap r2 with r5 >>> 24
+    bx      lr
+/*****************************************************************************
+* 1st order masked implementation of the GIFTb-128 block cipher. This function
+* simply encrypts a 128-bit block, without any operation mode.
+*****************************************************************************/
+@ void giftb128_encrypt_block(u8 *out, const u32* rkey, const u8 *block)
+.global giftb128_encrypt_block
+.type   giftb128_encrypt_block,%function
+giftb128_encrypt_block:
+    push    {r0-r12,r14}
+    // load plaintext blocks
+    ldr.w   r9, [r2]
+    ldr.w   r10, [r2, #4]
+    ldr.w   r11, [r2, #8]
+    ldr.w   r12, [r2, #12]   
+    rev     r9, r9
+    rev     r10, r10
+    rev     r11, r11
+    rev     r12, r12
+    movw    r2, #0x0000             // second share initalized to zero
+    movw    r3, #0x0000             // second share initalized to zero
+    movw    r4, #0x0000             // second share initalized to zero
+    movw    r5, #0x0000             // second share initalized to zero
+    // ------------------ GIFTb-CORE ROUTINE ------------------
+    adr     r0, rconst              // put 'rconst' address in r0
+    sub.w   sp, #4                  // allocate space on stack to store 'lr'
+    bl      quintuple_round_masked
+    bl      quintuple_round_masked
+    bl      quintuple_round_masked
+    bl      quintuple_round_masked
+    bl      quintuple_round_masked
+    bl      quintuple_round_masked
+    bl      quintuple_round_masked
+    bl      quintuple_round_masked
+    add.w   sp, #4
+    ldr.w   r0, [sp]                // restore 'ctext' address
+    // ------------------ UNMASKING ------------------
+    mov     r6, r9
+    mov     r9, #0                  // clear r9 before unmasking to avoid HD leakages
+    mov     r7, r10
+    mov     r10, #0                 // clear r10 before unmasking to avoid HD leakages
+    mov     r8, r11
+    mov     r11, #0                 // clear r11 before unmasking to avoid HD leakages
+    mov     r14, r12
+    mov     r12, #0                 // clear r12 before unmasking to avoid HD leakages
+    eor     r9, r6, r2              // unmask the internal state
+    eor     r10, r7, r3             // unmask the internal state
+    eor     r11, r8, r4             // unmask the internal state
+    eor     r12, r14, r5            // unmask the internal state
+    rev     r9, r9
+    rev     r10, r10
+    rev     r11, r11
+    rev     r12, r12
+    str.w   r9, [r0]
+    str.w   r10, [r0, #4]
+    str.w   r11, [r0, #8]
+    str.w   r12, [r0, #12]
+    pop     {r0-r12,r14}           // restore context
+    bx      lr
+/******************************************************************************
+* Macro to compute the GIFT-128 key update (in its classical representation).
+* Two 16-bit rotations are computed on the 32-bit word 'v' given as input.
+*   - u     1st round key word as defined in the specification (U <- W2||W3)
+*   - v     2nd round key word as defined in the specification (V <- W6||W7)
+******************************************************************************/
+.macro k_upd  u, v
+    and     r2, r10, \v, lsr #12
+    and     r3, \v, r9
+    orr     r2, r2, r3, lsl #4
+    and     r3, r12, \v, lsr #2
+    orr     r2, r2, r3
+    and     \v, \v, #0x00030000
+    orr     \v, r2, \v, lsl #14
+    str.w   \u, [r1], #4
+    str.w   \v, [r1], #4
+.endm
+/******************************************************************************
+* Macro to rearrange round key words from their classical to fixsliced
+* representations.
+*   - rk0   1st round key word
+*   - rk1   2nd round key word
+*   - idx0  index for SWAPMOVE
+*   - idx1  index for SWAPMOVE
+*   - tmp   temporary register for SWAPMOVE
+******************************************************************************/
+.macro rearr_rk rk0, rk1, idx0, idx1, tmp
+    swpmv   \rk1, \rk1, \rk1, \rk1, r3, \idx0, \tmp
+    swpmv   \rk0, \rk0, \rk0, \rk0, r3, \idx0, \tmp
+    swpmv   \rk1, \rk1, \rk1, \rk1, r10, \idx1, \tmp
+    swpmv   \rk0, \rk0, \rk0, \rk0, r10, \idx1, \tmp
+    swpmv   \rk1, \rk1, \rk1, \rk1, r11, #12, \tmp
+    swpmv   \rk0, \rk0, \rk0, \rk0, r11, #12, \tmp
+    swpmv   \rk1, \rk1, \rk1, \rk1, #0xff, #24, \tmp
+    swpmv   \rk0, \rk0, \rk0, \rk0, #0xff, #24, \tmp
+.endm
+/******************************************************************************
+* Macro to compute a triple key update on a round key word for a given round
+* number s.t. round number % 5 = 0.
+*   - rk0   the rkey word to be updated thrice
+*   - rk1   the other rkey word for the given word (no update needed)
+*   - idx0  index to store the 1st rkey word (= round number * 8)
+*   - idx1  index to store the 2nd rkey word (= round number * 8 + 4)
+******************************************************************************/
+.macro tpl_upd_0 rk0, rk1, idx0, idx1
+    and     r2, r12, \rk0, ror #24
+    and     \rk0, \rk0, r11
+    orr     \rk0, r2, \rk0, ror #16
+    swpmv   \rk0, \rk0, \rk0, \rk0, r8, #1, r2
+    swpmv   \rk1, \rk1, \rk1, \rk1, r10, #16, r2
+    swpmv   \rk1, \rk1, \rk1, \rk1, r9, #1, r2
+    str.w   \rk1, [r1, \idx0]
+    str.w   \rk0, [r1, \idx1]
+.endm
+/******************************************************************************
+* Macro to compute a triple key update on a round key word for a given round
+* number s.t. round number % 5 = 1.
+*   - rk    the rkey word to be updated thrice
+*   - idx   index to store the rkey word
+******************************************************************************/
+.macro tpl_upd_1 rk, idx
+    and     r2, r9, \rk, lsr #6
+    and     r3, \rk, r10, lsl #8
+    orr     r2, r2, r3, lsl #2
+    and     r3, r8, \rk, lsr #5
+    orr     r2, r2, r3
+    and     \rk, \rk, r7
+    orr     \rk, r2, \rk, lsl #3
+    str.w   \rk, [r1, \idx]
+.endm
+/******************************************************************************
+* Macro to compute a double key update on a round key word for a given round
+* number s.t. round number % 5 = 1.
+*   - rk    the rkey word to be updated thrice
+*   - idx   index to store the rkey word
+******************************************************************************/
+.macro dbl_upd_1 rk, idx
+    and     r2, r12, \rk, lsr #4
+    and     r3, \rk, r12
+    orr     r2, r2, r3, lsl #4
+    and     r3, r11, \rk, lsr #6
+    orr     r2, r2, r3
+    and     \rk, \rk, r10
+    orr     \rk, r2, \rk, lsl #2
+    str.w   \rk, [r1, \idx]
+.endm
+/******************************************************************************
+* Macro to compute a triple key update on a round key word for a given round
+* number s.t. round number % 5 = 2.
+*   - rk    the rkey word to be updated thrice
+*   - idx   index to store the rkey word
+******************************************************************************/
+.macro tpl_upd_2 rk, idx
+    and     r2, r12, \rk, ror #24
+    and     \rk, r11, \rk, ror #20
+    orr     \rk, \rk, r2
+    str.w   \rk, [r1, \idx]
+.endm
+/******************************************************************************
+* Macro to compute a double key update on a round key word for a given round
+* number s.t. round number % 5 = 2.
+*   - rk    the rkey word to be updated thrice
+*   - idx   index to store the rkey word
+******************************************************************************/
+.macro dbl_upd_2 rk, idx
+    and     r2, r11, \rk, ror #24
+    and     \rk, r12, \rk, ror #16
+    orr     \rk, \rk, r2
+    str.w   \rk, [r1, \idx]
+.endm
+/******************************************************************************
+* Macro to compute a triple key update on a round key word for a given round
+* number s.t. round number % 5 = 3.
+*   - rk    the rkey word to be updated thrice
+*   - idx   index to store the rkey word
+******************************************************************************/
+.macro tpl_upd_3 rk, idx
+    and     r2, r10, \rk, lsr #18
+    and     r3, \rk, r7, lsr #4
+    orr     r2, r2, r3, lsl #3
+    and     r3, r11, \rk, lsr #14
+    orr     r2, r2, r3
+    and     r3, \rk, r12, lsr #11
+    orr     r2, r2, r3, lsl #15
+    and     r3, r12, \rk, lsr #1
+    orr     r2, r2, r3
+    and     \rk, \rk, r7, lsr #16
+    orr     \rk, r2, \rk, lsl #19
+    str.w   \rk, [r1, \idx]
+.endm
+/******************************************************************************
+* Macro to compute a double key update on a round key word for a given round
+* number s.t. round number % 5 = 3.
+*   - rk    the rkey word to be updated thrice
+*   - idx   index to store the rkey word
+******************************************************************************/
+.macro dbl_upd_3 rk, idx
+    and     r2, r9, \rk, lsr #2
+    and     r3, r9, \rk
+    orr     r2, r2, r3, lsl #2
+    and     r3, r8, \rk, lsr #1
+    orr     r2, r2, r3
+    and     \rk, \rk, r7
+    orr     \rk, r2, \rk, lsl #3
+    str.w   \rk, [r1, \idx]
+.endm
+/******************************************************************************
+* Macro to compute a triple key update on a round key word for a given round
+* number s.t. round number % 5 = 4.
+*   - rk    the rkey word to be updated thrice
+*   - idx   index to store the rkey word
+******************************************************************************/
+.macro tpl_upd_4 rk, idx
+    and     r2, r7, \rk, lsr #6
+    and     r3, \rk, #0x003f0000
+    orr     r2, r2, r3, lsl #10
+    and     r3, r12, \rk, lsr #4
+    orr     r2, r2, r3
+    and     \rk, \rk, #0x000f
+    orr     \rk, r2, \rk, lsl #12
+    str.w   \rk, [r1, \idx]
+.endm
+/******************************************************************************
+* Macro to compute a double key update on a round key word for a given round
+* number s.t. round number % 5 = 4.
+*   - rk    the rkey word to be updated thrice
+*   - idx   index to store the rkey word
+******************************************************************************/
+.macro dbl_upd_4 rk, idx
+    and     r2, r10, \rk, lsr #4
+    and     r3, \rk, #0x000f0000
+    orr     r2, r2, r3, lsl #12
+    and     r3, r8, \rk, lsr #8
+    orr     r2, r2, r3
+    and     \rk, \rk, r8
+    orr     \rk, r2, \rk, lsl #8      //KEY_DOUBLE_UPDATE_4(r5)
+    str.w   \rk, [r1, \idx]
+.endm
+/******************************************************************************
+* Soubroutine to update the rkeys according to the classical representation.
+******************************************************************************/
+.align 2
+classical_key_update:
+    k_upd   r5, r7                  // 1st classical key update
+    k_upd   r4, r6                  // 2nd classical key update
+    k_upd   r7, r5                  // 3rd classical key update
+    k_upd   r6, r4                  // 4th classical key update
+    bx      lr
+/******************************************************************************
+* Soubroutine to rearrange round key words from classical to fixsliced
+* representation for round i s.t. i mod 5 = 0.
+******************************************************************************/
+.align 2
+rearrange_rkey_0:
+    ldr.w       r6, [r1]                // load 1st rkey word (classical rep)
+    ldr.w       r4, [r1, #4]            // load 2nd rkey word (classical rep)
+    rearr_rk    r4, r6, #9, #18, r12    // rearrange rkey words for round 1
+    str.w       r4, [r1, #4]            // store 2nd rkey word (fixsliced rep)
+    str.w       r6, [r1], #40           // store 1st rkey word (fixsliced rep)
+    bx          lr
+/******************************************************************************
+* Soubroutine to rearrange round key words from classical to fixsliced
+* representation for round i s.t. i mod 5 = 1 or 3.
+******************************************************************************/
+.align 2
+rearrange_rkey_1:
+    ldr.w       r5, [r1]                // load 3rd rkey word (classical rep)
+    ldr.w       r7, [r1, #4]            // load 4th rkey word (classical rep)
+    rearr_rk    r5, r7, #3, #6, r8      // rearrange rkey words for round 2
+    str.w       r7, [r1, #4]            // store 4th rkey word (fixsliced rep)
+    str.w       r5, [r1], #40           // store 3rd rkey word (fixsliced rep)
+    bx          lr
+/******************************************************************************
+* Soubroutine to rearrange round key words from classical to fixsliced
+* representation for round i s.t. i mod 5 = 2.
+******************************************************************************/
+.align 2
+rearrange_rkey_2:
+    ldr.w       r5, [r1]                // load 5th rkey word (classical rep)
+    ldr.w       r7, [r1, #4]            // load 6th rkey word (classical rep)
+    rearr_rk    r5, r7, #15, #18, r8    // rearrange rkey words for round 3
+    str.w       r7, [r1, #4]            // store 6th rkey word (fixsliced rep)
+    str.w       r5, [r1], #40           // store 5th rkey word (fixsliced rep)
+    bx          lr
+/******************************************************************************
+* Soubroutine to update round key words according to fixslicing for round i
+* s.t. i mod 5 = 0.
+******************************************************************************/
+.align 2
+key_update_0:
+    ldr.w       r4, [r1]                // load 1st rkey word for round i
+    ldr.w       r5, [r1, #4]            // load 2nd rkey word for round i
+    tpl_upd_0   r4, r5, #80, #84        // compute rkey words for round i+10
+    tpl_upd_0   r5, r4, #160, #164      // compute rkey words for round i+20
+    tpl_upd_0   r4, r5, #240, #244      // compute rkey words for round i+30
+    bx          lr
+/******************************************************************************
+* Soubroutine to update round key words according to fixslicing for round i
+* s.t. i mod 5 = 1.
+******************************************************************************/
+.align 2
+key_update_1:
+    ldr.w       r4, [r1, #8]            // load 1st rkey word for round i
+    ldr.w       r5, [r1, #12]           // load 1st rkey word for round i
+    tpl_upd_1   r4, #92                 // compute 2nd rkey word for round i+10
+    dbl_upd_1   r5, #88                 // compute 1st rkey word for round i+10
+    tpl_upd_1   r5, #172                // compute 2nd rkey word for round i+20
+    dbl_upd_1   r4, #168                // compute 1st rkey word for round i+20
+    tpl_upd_1   r4, #252                // compute 2nd rkey word for round i+30
+    dbl_upd_1   r5, #248                // compute 1st rkey word for round i+30
+    bx          lr
+/******************************************************************************
+* Soubroutine to update round key words according to fixslicing for round i
+* s.t. i mod 5 = 2.
+******************************************************************************/
+.align 2
+key_update_2:
+    ldr.w       r4, [r1, #16]           // load 1st rkey word for round i
+    ldr.w       r5, [r1, #20]           // load 2nd rkey word for round i
+    tpl_upd_2   r4, #100                // compute 2nd rkey word for round i+10
+    dbl_upd_2   r5, #96                 // compute 1st rkey word for round i+10
+    tpl_upd_2   r5, #180                // compute 2nd rkey word for round i+20
+    dbl_upd_2   r4, #176                // compute 1st rkey word for round i+20
+    tpl_upd_2   r4, #260                // compute 2nd rkey word for round i+30
+    dbl_upd_2   r5, #256                // compute 1st rkey word for round i+30
+    bx          lr
+/******************************************************************************
+* Soubroutine to update round key words according to fixslicing for round i
+* s.t. i mod 5 = 3.
+******************************************************************************/
+.align 2
+key_update_3:
+    ldr.w       r4, [r1, #24]           // load 1st rkey word for round i
+    ldr.w       r5, [r1, #28]           // load 2nd rkey word for round i
+    tpl_upd_3   r4, #108                // compute 2nd rkey word for round i+10
+    dbl_upd_3   r5, #104                // compute 1st rkey word for round i+10
+    tpl_upd_3   r5, #188                // compute 2nd rkey word for round i+20
+    dbl_upd_3   r4, #184                // compute 1st rkey word for round i+20
+    tpl_upd_3   r4, #268                // compute 2nd rkey word for round i+30
+    dbl_upd_3   r5, #264                // compute 1st rkey word for round i+30
+    bx          lr
+/******************************************************************************
+* Soubroutine to update round key words according to fixslicing for round i
+* s.t. i mod 5 = 4.
+******************************************************************************/
+.align 2
+key_update_4:
+    ldr.w       r4, [r1, #32]           // load 1st rkey word for round i
+    ldr.w       r5, [r1, #36]           // load 2nd rkey word for round i
+    tpl_upd_4   r4, #116                // compute 2nd rkey word for round i+10
+    dbl_upd_4   r5, #112                // compute 1st rkey word for round i+10
+    tpl_upd_4   r5, #196                // compute 2nd rkey word for round i+20
+    dbl_upd_4   r4, #192                // compute 1st rkey word for round i+20
+    tpl_upd_4   r4, #276                // compute 2nd rkey word for round i+30
+    dbl_upd_4   r5, #272                // compute 1st rkey word for round i+30
+    bx          lr
+/*****************************************************************************
+* 1st order masked implementation of the GIFT-128 key schedule according to
+* the fixsliced representation.
+*****************************************************************************/
+@ void gift128_keyschedule(const u8* key, u32* rkey, u32* masks) {
+.global gift128_keyschedule
+.type   gift128_keyschedule,%function
+gift128_keyschedule:
+    push    {r0-r12, r14}
+    ldm     r0, {r4-r7}             //load key words
+    ldm     r2, {r2-r3,r8,r14}      //load random words
+    mov     r0, #2                  //r0 <- 2
+    rev     r4, r4                  //endianness (could be skipped with another representation)
+    rev     r5, r5                  //endianness (could be skipped with another representation)
+    rev     r6, r6                  //endianness (could be skipped with another representation)
+    rev     r7, r7                  //endianness (could be skipped with another representation)
+    rev     r2, r2                  //endianness (could be skipped with another representation)
+    rev     r3, r3                  //endianness (could be skipped with another representation)
+    rev     r8, r8                  //endianness (could be skipped with another representation)
+    rev     r14, r14                //endianness (could be skipped with another representation)
+    strd    r7, r5, [r1], #8        // store the first rkeys
+    strd    r14, r3, [r1, #312]     // store the corresponding masks
+    strd    r6, r4, [r1], #8        // store the first rkeys
+    strd    r8, r2, [r1, #312]      // store the corresponding masks
+loop:
+    // keyschedule using classical representation for the first 20 rounds
+    movw    r12, #0x3fff
+    lsl     r12, r12, #16           // r12<- 0x3fff0000
+    movw    r10, #0x000f            // r10<- 0x0000000f
+    movw    r9, #0x0fff             // r9 <- 0x00000fff
+    bl      classical_key_update    // keyschedule using classical representation
+    bl      classical_key_update    // keyschedule using classical representation
+    sub.w   r1, r1, #80
+    movw    r3, #0x0055
+    movt    r3, #0x0055             // r3 <- 0x00550055
+    movw    r10, #0x3333            // r10<- 0x00003333
+    movw    r11, #0x000f
+    movt    r11, #0x000f            // r11<- 0x000f000f
+    bl      rearrange_rkey_0        // fixslice the rkeys
+    bl      rearrange_rkey_0        // fixslice the rkeys
+    sub.w   r1, r1, #72
+    movw    r3, #0x1111
+    movt    r3, #0x1111             // r3 <- 0x11111111
+    movw    r10, #0x0303
+    movt    r10, #0x0303            // r10<- 0x03030303
+    bl      rearrange_rkey_1        // fixslice the rkeys
+    bl      rearrange_rkey_1        // fixslice the rkeys
+    sub.w   r1, r1, #72
+    movw    r3, #0xaaaa             // r3 <- 0x0000aaaa
+    movw    r10, #0x3333            // r10<- 0x00003333
+    movw    r11, #0xf0f0            // r11<- 0x0000f0f0
+    bl      rearrange_rkey_2        // fixslice the rkeys
+    bl      rearrange_rkey_2        // fixslice the rkeys
+    sub.w   r1, r1, #72
+    movw    r3, #0x0a0a
+    movt    r3, #0x0a0a             // r3 <- 0x0a0a0a0a
+    movw    r10, #0x00cc
+    movt    r10, #0x00cc            // r10<- 0x00cc00cc
+    bl      rearrange_rkey_1        // fixslice the rkeys
+    bl      rearrange_rkey_1        // fixslice the rkeys
+    sub.w   r1, r1, #104
+    movw    r10, #0x3333            // r10<- 0x00003333
+    eor     r12, r10, r10, lsl #16  // r12<- 0w33333333 
+    mvn     r11, r12                // r11<- 0xcccccccc
+    movw    r9, #0x4444
+    movt    r9, #0x5555             // r9 <- 0x55554444
+    movw    r8, #0x1100
+    movt    r8, #0x5555             // r8 <- 0x55551100
+    bl      key_update_0            // keyschedule according to fixslicing
+    add.w   r1, r1, #40
+    bl      key_update_0            // keyschedule according to fixslicing
+    sub.w   r1, r1, #40
+    movw    r12, #0x0f00
+    movt    r12, #0x0f00            // r12<- 0x0f000f00
+    movw    r11, #0x0003
+    movt    r11, #0x0003            // r11<- 0x00030003
+    movw    r10, #0x003f
+    movt    r10, #0x003f            // r10<- 0x003f003f
+    lsl     r9, r11, #8             // r9 <- 0x03000300
+    and     r8, r10, r10, lsr #3    // r8 <- 0x00070007
+    orr     r7, r8, r8, lsl #2      // r7 <- 0x001f001f
+    bl      key_update_1            // keyschedule according to fixslicing
+    add.w   r1, r1, #40
+    bl      key_update_1            // keyschedule according to fixslicing
+    sub.w   r1, r1, #40
+    movw    r12, #0x5555
+    movt    r12, #0x5555            // r12<- 0x55555555
+    mvn     r11, r12                // r11<- 0xaaaaaaaa
+    bl      key_update_2            // keyschedule according to fixslicing
+    add.w   r1, r1, #40
+    bl      key_update_2            // keyschedule according to fixslicing
+    sub.w   r1, r1, #40
+    orr     r12, r8, r8, lsl #8     // r12<- 0x07070707
+    movw    r11, #0xc0c0            // r11<- 0x0000c0c0
+    movw    r10, #0x3030            // r10<- 0x00003030
+    and     r9, r12, r12, lsr #1    // r9 <- 0x03030303
+    lsl     r8, r12, #4             // r8 <- 0x70707070
+    eor     r7, r8, r9, lsl #5      // r7 <- 0x10101010
+    movw    r6, #0xf0f0             // r6 <- 0x0000f0f0
+    bl      key_update_3            // keyschedule according to fixslicing
+    add.w   r1, r1, #40
+    bl      key_update_3            // keyschedule according to fixslicing
+    sub.w   r1, r1, #40
+    movw    r12, #0x0fff
+    lsl     r10, r12, #16
+    movw    r8, #0x00ff             // r8 <- 0x000000ff
+    movw    r7, #0x03ff             // r7 <- 0x000003ff
+    lsl     r7, r7, #16
+    bl      key_update_4            // keyschedule according to fixslicing
+    add.w   r1, r1, #40
+    bl      key_update_4            // keyschedule according to fixslicing
+    add.w   r1, r1, #280            //r1 now points to the masks
+    ldrd    r7, r5, [r1], #8
+    ldrd    r6, r4, [r1], #8
+    subs    r0, r0, #1              //r0 <- r0-1 
+    bne     loop                    //go to 'loop' if r0=0
+    pop     {r0-r12,r14}
+    bx      lr
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/giftb128.h
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/giftb128.h
+#ifndef GIFT128_H_
+#define GIFT128_H_
+#include <stdint.h>
+#define KEY_SIZE    		16
+#define GIFT128_BLOCK_SIZE  16
+extern void gift128_keyschedule(const uint8_t* key, uint32_t* rkey, const uint8_t* key_m);
+extern void giftb128_encrypt_block(uint8_t* out_block, const uint32_t* rkey, const uint8_t* in_block);
+#endif  // GIFT128_H_
\ No newline at end of file
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/goal_emsca
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/goal_emsca
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/goal_powersca_1st
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/goal_powersca_1st
--- a/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/implementers
+++ b/src-gift/gift/Implementations/crypto_aead/giftcofbv1/protected_giftcofb/implementers
+Alexandre Adomnicai
\ No newline at end of file
--- a/src-gift/gift/LICENSE
+++ b/src-gift/gift/LICENSE
+CC0 1.0 Universal
+Statement of Purpose
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator and
+subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+Certain owners wish to permanently relinquish those rights to a Work for the
+purpose of contributing to a commons of creative, cultural and scientific
+works ("Commons") that the public can reliably and without fear of later
+claims of infringement build upon, modify, incorporate in other works, reuse
+and redistribute as freely as possible in any form whatsoever and for any
+purposes, including without limitation commercial purposes. These owners may
+contribute to the Commons to promote the ideal of a free culture and the
+further production of creative, cultural and scientific works, or to gain
+reputation or greater distribution for their Work in part through the use and
+efforts of others.
+For these and/or other purposes and motivations, and without any expectation
+of additional consideration or compensation, the person associating CC0 with a
+Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
+and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
+and publicly distribute the Work under its terms, with knowledge of his or her
+Copyright and Related Rights in the Work and the meaning and intended legal
+effect of CC0 on those rights.
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not limited
+to, the following:
+  i. the right to reproduce, adapt, distribute, perform, display, communicate,
+  and translate a Work;
+  ii. moral rights retained by the original author(s) and/or performer(s);
+  iii. publicity and privacy rights pertaining to a person's image or likeness
+  depicted in a Work;
+  iv. rights protecting against unfair competition in regards to a Work,
+  subject to the limitations in paragraph 4(a), below;
+  v. rights protecting the extraction, dissemination, use and reuse of data in
+  a Work;
+  vi. database rights (such as those arising under Directive 96/9/EC of the
+  European Parliament and of the Council of 11 March 1996 on the legal
+  protection of databases, and under any national implementation thereof,
+  including any amended or successor version of such directive); and
+  vii. other similar, equivalent or corresponding rights throughout the world
+  based on applicable law or treaty, and any national implementations thereof.
+2. Waiver. To the greatest extent permitted by, but not in contravention of,
+applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
+unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
+and Related Rights and associated claims and causes of action, whether now
+known or unknown (including existing as well as future claims and causes of
+action), in the Work (i) in all territories worldwide, (ii) for the maximum
+duration provided by applicable law or treaty (including future time
+extensions), (iii) in any current or future medium and for any number of
+copies, and (iv) for any purpose whatsoever, including without limitation
+commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
+the Waiver for the benefit of each member of the public at large and to the
+detriment of Affirmer's heirs and successors, fully intending that such Waiver
+shall not be subject to revocation, rescission, cancellation, termination, or
+any other legal or equitable action to disrupt the quiet enjoyment of the Work
+by the public as contemplated by Affirmer's express Statement of Purpose.
+3. Public License Fallback. Should any part of the Waiver for any reason be
+judged legally invalid or ineffective under applicable law, then the Waiver
+shall be preserved to the maximum extent permitted taking into account
+Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
+is so judged Affirmer hereby grants to each affected person a royalty-free,
+non transferable, non sublicensable, non exclusive, irrevocable and
+unconditional license to exercise Affirmer's Copyright and Related Rights in
+the Work (i) in all territories worldwide, (ii) for the maximum duration
+provided by applicable law or treaty (including future time extensions), (iii)
+in any current or future medium and for any number of copies, and (iv) for any
+purpose whatsoever, including without limitation commercial, advertising or
+promotional purposes (the "License"). The License shall be deemed effective as
+of the date CC0 was applied by Affirmer to the Work. Should any part of the
+License for any reason be judged legally invalid or ineffective under
+applicable law, such partial invalidity or ineffectiveness shall not
+invalidate the remainder of the License, and in such case Affirmer hereby
+affirms that he or she will not (i) exercise any of his or her remaining
+Copyright and Related Rights in the Work or (ii) assert any associated claims
+and causes of action with respect to the Work, in either case contrary to
+Affirmer's express Statement of Purpose.
+4. Limitations and Disclaimers.
+  a. No trademark or patent rights held by Affirmer are waived, abandoned,
+  surrendered, licensed or otherwise affected by this document.
+  b. Affirmer offers the Work as-is and makes no representations or warranties
+  of any kind concerning the Work, express, implied, statutory or otherwise,
+  including without limitation warranties of title, merchantability, fitness
+  for a particular purpose, non infringement, or the absence of latent or
+  other defects, accuracy, or the present or absence of errors, whether or not
+  discoverable, all to the greatest extent permissible under applicable law.
+  c. Affirmer disclaims responsibility for clearing rights of other persons
+  that may apply to the Work or any use thereof, including without limitation
+  any person's Copyright and Related Rights in the Work. Further, Affirmer
+  disclaims responsibility for obtaining any necessary consents, permissions
+  or other rights required for any use of the Work.
+  d. Affirmer understands and acknowledges that Creative Commons is not a
+  party to this document and has no duty or obligation with respect to this
+  CC0 or use of the Work.
+For more information, please see
+<http://creativecommons.org/publicdomain/zero/1.0/>
--- a/src-gift/gift/README.md
+++ b/src-gift/gift/README.md
+# GIFT-COFB software implementation protected against 1st-order side-channel attacks
+This repository contain software implementations of [GIFT-COFB](https://www.isical.ac.in/~lightweight/COFB/), one of the finalists of the [NIST LWC competition](https://csrc.nist.gov/projects/lightweight-cryptography).
+These implementations were written to answer the [call for protected software implementations](https://cryptography.gmu.edu/athena/LWC/Call_for_Protected_Software_Implementations.pdf) issued by the [Cryptographic Engineering Research Group](https://cryptography.gmu.edu/) from George Mason University, and therefore follow the proposed API.
+Note that the implementations require an external `randombytes` function with the following prototype:
+`void randombytes(unsigned char *,unsigned long long);`
+in order to generate the shares used as masks.
+More details about the implementations and countermeasures are given in `Documents/documentation.pdf`.
--- a/templates/f7/Inc/main.h
+++ b/templates/f7/Inc/main.h
@@ -39,6 +39,8 @@ extern "C" {
 #include "stm32f7xx_ll_usart.h"
 #include "stm32f7xx.h"
 #include "stm32f7xx_ll_gpio.h"
+#include "stm32f7xx_ll_rng.h"
+#include "stm32f7xx_hal_rng.h"
 #if defined(USE_FULL_ASSERT)
 #include "stm32_assert.h"

--- a/templates/f7/Inc/randombytes.h
+++ b/templates/f7/Inc/randombytes.h
+#ifndef RANDOMBYTES_H_
+#define RANDOMBYTES_H_
+#include <stdint.h>
+void init_rng(void);
+void randombytes(unsigned char* x, unsigned long long len);
+uint32_t rand32(void);
+#endif /* RANDOMBYTES_H_ */
--- a/templates/f7/Inc/stm32f7xx_hal_conf.h
+++ b/templates/f7/Inc/stm32f7xx_hal_conf.h
@@ -56,7 +56,7 @@
 /* #define HAL_LPTIM_MODULE_ENABLED   */
 /* #define HAL_LTDC_MODULE_ENABLED   */
 /* #define HAL_QSPI_MODULE_ENABLED   */
-/* #define HAL_RNG_MODULE_ENABLED   */
+#define HAL_RNG_MODULE_ENABLED   
 /* #define HAL_RTC_MODULE_ENABLED   */
 /* #define HAL_SAI_MODULE_ENABLED   */
 /* #define HAL_SD_MODULE_ENABLED   */
@@ -149,7 +149,7 @@
 #define  USE_RTOS                     0U
 #define  PREFETCH_ENABLE              0U
 #define  ART_ACCLERATOR_ENABLE        0U /* To enable instruction cache and prefetch */
+#define  USE_HAL_RNG_REGISTER_CALLBACKS         0U /* RNG register callback disabled      */
 /* ########################## Assert Selection ############################## */
 /**
  * @brief Uncomment the line below to expanse the "assert_param" macro in the 

--- a/templates/f7/Makefile
+++ b/templates/f7/Makefile
@@ -44,9 +44,13 @@ Drivers/STM32F7xx_HAL_Driver/Src/stm32f7xx_ll_dma.c \
 Drivers/STM32F7xx_HAL_Driver/Src/stm32f7xx_ll_rcc.c \
 Drivers/STM32F7xx_HAL_Driver/Src/stm32f7xx_ll_utils.c \
 Drivers/STM32F7xx_HAL_Driver/Src/stm32f7xx_ll_exti.c \
+Drivers/STM32F7xx_HAL_Driver/Src/stm32f7xx_ll_rng.c \
+Drivers/STM32F7xx_HAL_Driver/Src/stm32f7xx_hal_rng.c \
+Drivers/STM32F7xx_HAL_Driver/Src/stm32f7xx_hal.c \
 Src/system_stm32f7xx.c \
 Src/test.c \
 Src/uartp.c \
+Src/randombytes.c \
 $(SRC_FILES)
 # ASM sources

--- a/templates/f7/Src/main.c
+++ b/templates/f7/Src/main.c
@@ -24,6 +24,7 @@
 /* Private includes ----------------------------------------------------------*/
 /* USER CODE BEGIN Includes */
 #include "test.h"
+#include "randombytes.h"
 /* USER CODE END Includes */
@@ -43,7 +44,6 @@
 /* USER CODE END PM */
 /* Private variables ---------------------------------------------------------*/
 /* USER CODE BEGIN PV */
 /* USER CODE END PV */
@@ -53,6 +53,7 @@ void SystemClock_Config(void);
 static void MX_GPIO_Init(void);
 static void MX_USART2_UART_Init(void);
 static void MX_USART3_UART_Init(void);
 /* USER CODE BEGIN PFP */
 /* USER CODE END PFP */
@@ -81,7 +82,6 @@ int main(void)
  /* MCU Configuration--------------------------------------------------------*/
  /* Reset of all peripherals, Initializes the Flash interface and the Systick. */
  LL_APB1_GRP1_EnableClock(LL_APB1_GRP1_PERIPH_PWR);
  LL_APB2_GRP1_EnableClock(LL_APB2_GRP1_PERIPH_SYSCFG);
@@ -106,6 +106,7 @@ int main(void)
  MX_USART2_UART_Init();
  MX_USART3_UART_Init();
  /* USER CODE BEGIN 2 */
+  init_rng();
  test_setup();
  /* USER CODE END 2 */
@@ -167,6 +168,8 @@ void SystemClock_Config(void)
  LL_RCC_SetUSARTClockSource(LL_RCC_USART3_CLKSOURCE_PCLK1);
 }
 /**
  * @brief USART2 Initialization Function
  * @param None

--- a/templates/f7/Src/randombytes.c
+++ b/templates/f7/Src/randombytes.c
+#include "randombytes.h"
+#include "main.h"
+#include "stm32f7xx.h"
+#include <stdint.h>
+RNG_HandleTypeDef hrng;
+void init_rng(void)
+{
+  hrng.Instance = RNG;
+  if (HAL_RNG_Init(&hrng) != HAL_OK) {
+    Error_Handler();
+  }
+}
+uint32_t rand32(void) {
+  uint32_t myrndnum=0;
+  HAL_RNG_GenerateRandomNumber(&hrng, &myrndnum);
+  return myrndnum;
+}
+void randombytes(unsigned char* x, unsigned long long len) {
+  while (len >= 4) {
+    *(uint32_t*)x = rand32();
+    x += 4;
+    len -= 4;
+  }
+  if (len == 0) return;
+  uint32_t rnd = rand32();
+  while (len) {
+    *x = rnd;
+    rnd >>= 8;
+    x++;
+    len--;
+  }
+}
--- a/templates/f7/Src/stm32f7xx_hal_msp.c
+++ b/templates/f7/Src/stm32f7xx_hal_msp.c
+/* USER CODE BEGIN Header */
+/**
+  ******************************************************************************
+  * File Name          : stm32f7xx_hal_msp.c
+  * Description        : This file provides code for the MSP Initialization
+  *                      and de-Initialization codes.
+  ******************************************************************************
+  * @attention
+  *
+  * <h2><center>&copy; Copyright (c) 2022 STMicroelectronics.
+  * All rights reserved.</center></h2>
+  *
+  * This software component is licensed by ST under BSD 3-Clause license,
+  * the "License"; You may not use this file except in compliance with the
+  * License. You may obtain a copy of the License at:
+  *                        opensource.org/licenses/BSD-3-Clause
+  *
+  ******************************************************************************
+  */
+/* USER CODE END Header */
+/* Includes ------------------------------------------------------------------*/
+#include "main.h"
+/* USER CODE BEGIN Includes */
+/* USER CODE END Includes */
+/* Private typedef -----------------------------------------------------------*/
+/* USER CODE BEGIN TD */
+/* USER CODE END TD */
+/* Private define ------------------------------------------------------------*/
+/* USER CODE BEGIN Define */
+/* USER CODE END Define */
+/* Private macro -------------------------------------------------------------*/
+/* USER CODE BEGIN Macro */
+/* USER CODE END Macro */
+/* Private variables ---------------------------------------------------------*/
+/* USER CODE BEGIN PV */
+/* USER CODE END PV */
+/* Private function prototypes -----------------------------------------------*/
+/* USER CODE BEGIN PFP */
+/* USER CODE END PFP */
+/* External functions --------------------------------------------------------*/
+/* USER CODE BEGIN ExternalFunctions */
+/* USER CODE END ExternalFunctions */
+/* USER CODE BEGIN 0 */
+/* USER CODE END 0 */
+/**
+  * Initializes the Global MSP.
+  */
+void HAL_MspInit(void)
+{
+  /* USER CODE BEGIN MspInit 0 */
+  /* USER CODE END MspInit 0 */
+  __HAL_RCC_PWR_CLK_ENABLE();
+  __HAL_RCC_SYSCFG_CLK_ENABLE();
+  /* System interrupt init*/
+  /* USER CODE BEGIN MspInit 1 */
+  /* USER CODE END MspInit 1 */
+}
+/**
+* @brief RNG MSP Initialization
+* This function configures the hardware resources used in this example
+* @param hrng: RNG handle pointer
+* @retval None
+*/
+void HAL_RNG_MspInit(RNG_HandleTypeDef* hrng)
+{
+  if(hrng->Instance==RNG)
+  {
+  /* USER CODE BEGIN RNG_MspInit 0 */
+  /* USER CODE END RNG_MspInit 0 */
+    /* Peripheral clock enable */
+    __HAL_RCC_RNG_CLK_ENABLE();
+  /* USER CODE BEGIN RNG_MspInit 1 */
+  /* USER CODE END RNG_MspInit 1 */
+  }
+}
+/**
+* @brief RNG MSP De-Initialization
+* This function freeze the hardware resources used in this example
+* @param hrng: RNG handle pointer
+* @retval None
+*/
+void HAL_RNG_MspDeInit(RNG_HandleTypeDef* hrng)
+{
+  if(hrng->Instance==RNG)
+  {
+  /* USER CODE BEGIN RNG_MspDeInit 0 */
+  /* USER CODE END RNG_MspDeInit 0 */
+    /* Peripheral clock disable */
+    __HAL_RCC_RNG_CLK_DISABLE();
+  /* USER CODE BEGIN RNG_MspDeInit 1 */
+  /* USER CODE END RNG_MspDeInit 1 */
+  }
+}
+/* USER CODE BEGIN 1 */
+/* USER CODE END 1 */
+/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/
--- a/templates/f7/compile
+++ b/templates/f7/compile
@@ -3,8 +3,7 @@
 mv -n *.dat *.inc *.h Inc/
 [ -f ./build ] && rm ./build
 [ -f src/encrypt.c ] && sed -i src/encrypt.c -e "s/\(\s\)init(/\1_init(/g"
-mkdir -p /tmp/f7/Drivers
+cp -r /home/sr/PhD/research/lwc/compare/templates/f7/Drivers Drivers/
-ln -s /tmp/f7/Drivers Drivers
 make
 EXIT_CODE=$?
@@ -13,9 +12,6 @@ mv build/f7.bin .
 mv build/f7.hex . 
 mv build/f7.elf .
-rm -rf ./Drivers
-rm -rf ./build/
 exit $EXIT_CODE