asconp.h 4.57 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
#ifndef ASCONP_H
#define ASCONP_H

#include <inttypes.h>
#include "forceinline.h"

typedef union
{
    uint64_t x;
    uint32_t w[2];
    uint8_t b[8];
} lane_t;

typedef union
{
    lane_t l[5];
    uint64_t x[5];
    uint32_t w[5][2];
    uint8_t b[5][8];
} state_t;

/* ---------------------------------------------------------------- */

#define P_sH PROUNDS(s, 12)
#define P_sB PROUNDS(s, 1)
#define P_sE PROUNDS(s, 6)
#define P_sK PROUNDS(s, 12)

/* ---------------------------------------------------------------- */

#define U64TOWORD(x) U64BIG(x)
#define WORDTOU64(x) U64BIG(x)

/* ---------------------------------------------------------------- */

#define TOBI(x) (x)
#define FROMBI(x) (x)

/* ---------------------------------------------------------------- */

lane_t U64BIG(lane_t x)
{
    x.x = ((((x.x) & 0x00000000000000FFULL) << 56) | (((x.x) & 0x000000000000FF00ULL) << 40) |
           (((x.x) & 0x0000000000FF0000ULL) << 24) | (((x.x) & 0x00000000FF000000ULL) << 8) |
           (((x.x) & 0x000000FF00000000ULL) >> 8) | (((x.x) & 0x0000FF0000000000ULL) >> 24) |
           (((x.x) & 0x00FF000000000000ULL) >> 40) | (((x.x) & 0xFF00000000000000ULL) >> 56));
    return x;
}

/* ---------------------------------------------------------------- */

#define XMUL(i, x)                                   \
    do                                               \
    {                                                \
        tmp = (uint16_t)a.b[i] * (1 << (x));         \
        b.b[(byte_rol + (i)) & 0x7] ^= (uint8_t)tmp; \
        b.b[(byte_rol + (i) + 1) & 0x7] ^= tmp >> 8; \
    } while (0)

uint64_t ROR(uint64_t x, int n)
{
    lane_t a = {.x = x}, b = {.x = 0ull};
    int bit_rol = (64 - n) & 0x7;
    int byte_rol = (64 - n) >> 3;
    uint16_t tmp;
    XMUL(0, bit_rol);
    XMUL(1, bit_rol);
    XMUL(2, bit_rol);
    XMUL(3, bit_rol);
    XMUL(4, bit_rol);
    XMUL(5, bit_rol);
    XMUL(6, bit_rol);
    XMUL(7, bit_rol);
    return b.x;
}

/* ---------------------------------------------------------------- */

forceinline uint8_t NOT8(uint8_t a) { return ~a; }

forceinline uint8_t XOR8(uint8_t a, uint8_t b) { return a ^ b; }

forceinline uint8_t AND8(uint8_t a, uint8_t b) { return a & b; }

forceinline uint8_t OR8(uint8_t a, uint8_t b) { return a | b; }

/* ---------------------------------------------------------------- */

forceinline void LINEAR_LAYER(state_t *s, uint64_t xtemp)
{
    uint64_t temp;
    temp = s->x[2] ^ ROR(s->x[2], 28 - 19);
    s->x[0] = s->x[2] ^ ROR(temp, 19);
    temp = s->x[4] ^ ROR(s->x[4], 6 - 1);
    s->x[2] = s->x[4] ^ ROR(temp, 1);
    temp = s->x[1] ^ ROR(s->x[1], 41 - 7);
    s->x[4] = s->x[1] ^ ROR(temp, 7);
    temp = s->x[3] ^ ROR(s->x[3], 61 - 39);
    s->x[1] = s->x[3] ^ ROR(temp, 39);
    temp = xtemp ^ ROR(xtemp, 17 - 10);
    s->x[3] = xtemp ^ ROR(temp, 10);
}

/* ---------------------------------------------------------------- */

forceinline void NONLINEAR_LAYER(state_t *s, lane_t *xtemp, uint8_t pos)
{
    uint8_t t0;
    uint8_t t1;
    uint8_t t2;
    // Based on the round description of Ascon given in the Bachelor's thesis:
    //"Optimizing Ascon on RISC-V" of Lars Jellema
    // see https://github.com/Lucus16/ascon-riscv/
    t0 = XOR8(s->b[1][pos], s->b[2][pos]);
    t1 = XOR8(s->b[0][pos], s->b[4][pos]);
    t2 = XOR8(s->b[3][pos], s->b[4][pos]);
    s->b[4][pos] = OR8(s->b[3][pos], NOT8(s->b[4][pos]));
    s->b[4][pos] = XOR8(s->b[4][pos], t0);
    s->b[3][pos] = XOR8(s->b[3][pos], s->b[1][pos]);
    s->b[3][pos] = OR8(s->b[3][pos], t0);
    s->b[3][pos] = XOR8(s->b[3][pos], t1);
    s->b[2][pos] = XOR8(s->b[2][pos], t1);
    s->b[2][pos] = OR8(s->b[2][pos], s->b[1][pos]);
    s->b[2][pos] = XOR8(s->b[2][pos], t2);
    s->b[1][pos] = AND8(s->b[1][pos], NOT8(t1));
    s->b[1][pos] = XOR8(s->b[1][pos], t2);
    s->b[0][pos] = OR8(s->b[0][pos], t2);
    (*xtemp).b[pos] = XOR8(s->b[0][pos], t0);
}

/* ---------------------------------------------------------------- */

forceinline void ROUND(state_t *s, uint8_t C)
{
    lane_t xtemp;
    /* round constant */
    s->b[2][0] = XOR8(s->b[2][0], C);
    /* s-box layer */
    for (uint8_t i = 0; i < 8; i++)
        NONLINEAR_LAYER(s, &xtemp, i);
    /* linear layer */
    LINEAR_LAYER(s, xtemp.x);
}

/* ---------------------------------------------------------------- */

#define RC(i) (i)
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#define INC -0x0f
#define END 0x3c

void PROUNDS(state_t *s, uint8_t nr)
{
    int i = START(nr);
    do
    {
        ROUND(s, RC(i));
        i += INC;
    } while (i != END);
}

/* ---------------------------------------------------------------- */

#endif // ASCONP_H