#ifndef ROUND_H_ #define ROUND_H_ #include "ascon.h" #include "printstate.h" #include "random.h" __forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { *K0 = TOSHARES(0); *K1 = TOSHARES(0); *K2 = TOSHARES(0); } __forceinline void PINIT(state_t* s) { randinit(); s->x0 = TOSHARES(0); s->x1 = TOSHARES(0); s->x2 = TOSHARES(0); s->x3 = TOSHARES(0); s->x4 = TOSHARES(0); s->rx = TOSHARES(0); } #define TOFFOLI(a0, a1, b0, b1, c0, c1) \ do { \ (a0) ^= (~(b0)) & (c1); \ (a0) ^= (~(b0)) & (c0); \ (a1) ^= (b1) & (c1); \ (a1) ^= (b1) & (c0); \ } while (0) __forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { state_t t; /* refresh randomness */ /* s->rx = TOSHARES(0); */ /* addition of round constant */ s->x2.s0.e ^= C_e; /* substitution layer */ s->x0.s0.e ^= s->x4.s0.e; s->x4.s0.e ^= s->x3.s0.e; s->x2.s0.e ^= s->x1.s0.e; s->x0.s1.e ^= s->x4.s1.e; s->x4.s1.e ^= s->x3.s1.e; s->x2.s1.e ^= s->x1.s1.e; /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ s->rx.s0.e = s->rx.s1.e; TOFFOLI(s->rx.s0.e, s->rx.s1.e, s->x4.s0.e, s->x4.s1.e, s->x0.s0.e, s->x0.s1.e); TOFFOLI(s->x0.s0.e, s->x0.s1.e, s->x1.s0.e, s->x1.s1.e, s->x2.s0.e, s->x2.s1.e); TOFFOLI(s->x2.s0.e, s->x2.s1.e, s->x3.s0.e, s->x3.s1.e, s->x4.s0.e, s->x4.s1.e); TOFFOLI(s->x4.s0.e, s->x4.s1.e, s->x0.s0.e, s->x0.s1.e, s->x1.s0.e, s->x1.s1.e); TOFFOLI(s->x1.s0.e, s->x1.s1.e, s->x2.s0.e, s->x2.s1.e, s->x3.s0.e, s->x3.s1.e); s->x3.s1.e ^= s->rx.s1.e; s->x3.s0.e ^= s->rx.s0.e; /* end of shared keccak s-box */ s->x1.s0.e ^= s->x0.s0.e; s->x0.s0.e ^= s->x4.s0.e; s->x3.s0.e ^= s->x2.s0.e; s->x2.s0.e = ~s->x2.s0.e; s->x1.s1.e ^= s->x0.s1.e; s->x0.s1.e ^= s->x4.s1.e; s->x3.s1.e ^= s->x2.s1.e; /* addition of round constant */ s->x2.s0.o ^= C_o; /* substitution layer */ s->x0.s0.o ^= s->x4.s0.o; s->x4.s0.o ^= s->x3.s0.o; s->x2.s0.o ^= s->x1.s0.o; s->x0.s1.o ^= s->x4.s1.o; s->x4.s1.o ^= s->x3.s1.o; s->x2.s1.o ^= s->x1.s1.o; /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ s->rx.s0.o = s->rx.s1.o; TOFFOLI(s->rx.s0.o, s->rx.s1.o, s->x4.s0.o, s->x4.s1.o, s->x0.s0.o, s->x0.s1.o); TOFFOLI(s->x0.s0.o, s->x0.s1.o, s->x1.s0.o, s->x1.s1.o, s->x2.s0.o, s->x2.s1.o); TOFFOLI(s->x2.s0.o, s->x2.s1.o, s->x3.s0.o, s->x3.s1.o, s->x4.s0.o, s->x4.s1.o); TOFFOLI(s->x4.s0.o, s->x4.s1.o, s->x0.s0.o, s->x0.s1.o, s->x1.s0.o, s->x1.s1.o); TOFFOLI(s->x1.s0.o, s->x1.s1.o, s->x2.s0.o, s->x2.s1.o, s->x3.s0.o, s->x3.s1.o); s->x3.s1.o ^= s->rx.s1.o; s->x3.s0.o ^= s->rx.s0.o; /* end of shared keccak s-box */ s->x1.s0.o ^= s->x0.s0.o; s->x0.s0.o ^= s->x4.s0.o; s->x3.s0.o ^= s->x2.s0.o; s->x2.s0.o = ~s->x2.s0.o; s->x1.s1.o ^= s->x0.s1.o; s->x0.s1.o ^= s->x4.s1.o; s->x3.s1.o ^= s->x2.s1.o; /* linear diffusion layer */ t.x0.s1.e = s->x0.s1.e ^ ROR32(s->x0.s1.o, 4); t.x0.s1.o = s->x0.s1.o ^ ROR32(s->x0.s1.e, 5); t.x1.s1.e = s->x1.s1.e ^ ROR32(s->x1.s1.e, 11); t.x1.s1.o = s->x1.s1.o ^ ROR32(s->x1.s1.o, 11); t.x2.s1.e = s->x2.s1.e ^ ROR32(s->x2.s1.o, 2); t.x2.s1.o = s->x2.s1.o ^ ROR32(s->x2.s1.e, 3); t.x3.s1.e = s->x3.s1.e ^ ROR32(s->x3.s1.o, 3); t.x3.s1.o = s->x3.s1.o ^ ROR32(s->x3.s1.e, 4); t.x4.s1.e = s->x4.s1.e ^ ROR32(s->x4.s1.e, 17); t.x4.s1.o = s->x4.s1.o ^ ROR32(s->x4.s1.o, 17); s->x0.s1.e ^= ROR32(t.x0.s1.o, 9); s->x0.s1.o ^= ROR32(t.x0.s1.e, 10); s->x1.s1.e ^= ROR32(t.x1.s1.o, 19); s->x1.s1.o ^= ROR32(t.x1.s1.e, 20); s->x2.s1.e ^= t.x2.s1.o; s->x2.s1.o ^= ROR32(t.x2.s1.e, 1); s->x3.s1.e ^= ROR32(t.x3.s1.e, 5); s->x3.s1.o ^= ROR32(t.x3.s1.o, 5); s->x4.s1.e ^= ROR32(t.x4.s1.o, 3); s->x4.s1.o ^= ROR32(t.x4.s1.e, 4); t.x0.s0.e = s->x0.s0.e ^ ROR32(s->x0.s0.o, 4); t.x0.s0.o = s->x0.s0.o ^ ROR32(s->x0.s0.e, 5); t.x1.s0.e = s->x1.s0.e ^ ROR32(s->x1.s0.e, 11); t.x1.s0.o = s->x1.s0.o ^ ROR32(s->x1.s0.o, 11); t.x2.s0.e = s->x2.s0.e ^ ROR32(s->x2.s0.o, 2); t.x2.s0.o = s->x2.s0.o ^ ROR32(s->x2.s0.e, 3); t.x3.s0.e = s->x3.s0.e ^ ROR32(s->x3.s0.o, 3); t.x3.s0.o = s->x3.s0.o ^ ROR32(s->x3.s0.e, 4); t.x4.s0.e = s->x4.s0.e ^ ROR32(s->x4.s0.e, 17); t.x4.s0.o = s->x4.s0.o ^ ROR32(s->x4.s0.o, 17); s->x0.s0.e ^= ROR32(t.x0.s0.o, 9); s->x0.s0.o ^= ROR32(t.x0.s0.e, 10); s->x1.s0.e ^= ROR32(t.x1.s0.o, 19); s->x1.s0.o ^= ROR32(t.x1.s0.e, 20); s->x2.s0.e ^= t.x2.s0.o; s->x2.s0.o ^= ROR32(t.x2.s0.e, 1); s->x3.s0.e ^= ROR32(t.x3.s0.e, 5); s->x3.s0.o ^= ROR32(t.x3.s0.o, 5); s->x4.s0.e ^= ROR32(t.x4.s0.o, 3); s->x4.s0.o ^= ROR32(t.x4.s0.e, 4); printstate(" round output", s); } #endif /* ROUND_H_ */