asconp.h 9.8 KB
Newer Older
Robert Primas committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
#ifndef ASCONP_H_
#define ASCONP_H_

typedef unsigned char u8;
typedef unsigned long u32;
typedef unsigned long long u64;

typedef struct
{
    u32 e;
    u32 o;
} u32_2;

// Round constants, bit-interleaved
u32 rc_o[12] = {0xc, 0xc, 0x9, 0x9, 0xc, 0xc, 0x9, 0x9, 0x6, 0x6, 0x3, 0x3};
u32 rc_e[12] = {0xc, 0x9, 0xc, 0x9, 0x6, 0x3, 0x6, 0x3, 0xc, 0x9, 0xc, 0x9};

/* ---------------------------------------------------------------- */

u64 U64BIG(u64 x)
{
    return ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) |
            (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) |
            (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) |
            (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56));
}

/* ---------------------------------------------------------------- */

// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void to_bit_interleaving(u32_2 *out, u64 in)
{
    u32 hi = (in) >> 32;
    u32 lo = (u32)(in);
    u32 r0, r1;
    r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
    r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
    r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
    r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
    r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
    r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
    r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
    r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
    (*out).e = (lo & 0x0000FFFF) | (hi << 16);
    (*out).o = (lo >> 16) | (hi & 0xFFFF0000);
}

/* ---------------------------------------------------------------- */

// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
void from_bit_interleaving(u64 *out, u32_2 in)
{
    u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16);
    u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000);
    u32 r0, r1;
    r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
    r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
    r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
    r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
    r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
    r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
    r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
    r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
    *out = (u64)hi << 32 | lo;
}

/* ---------------------------------------------------------------- */

#define ROUND()                                            \
    do                                                     \
    {                                                      \
        __asm__ __volatile__(                              \
            "eor %[x0_e], %[x0_e], %[x4_e]\n\t"            \
            "eor %[x0_o], %[x0_o], %[x4_o]\n\t"            \
            "eor %[x4_e], %[x4_e], %[x3_e]\n\t"            \
            "eor %[x4_o], %[x4_o], %[x3_o]\n\t"            \
            "eor %[x2_e], %[x2_e], %[x1_e]\n\t"            \
            "eor %[x2_o], %[x2_o], %[x1_o]\n\t"            \
            "bic %[reg0], %[x0_e], %[x4_e]\n\t"            \
            "bic %[reg1], %[x4_e], %[x3_e]\n\t"            \
            "bic %[reg2], %[x2_e], %[x1_e]\n\t"            \
            "bic %[reg3], %[x1_e], %[x0_e]\n\t"            \
            "eor %[x2_e], %[x2_e], %[reg1]\n\t"            \
            "eor %[x0_e], %[x0_e], %[reg2]\n\t"            \
            "eor %[x4_e], %[x4_e], %[reg3]\n\t"            \
            "bic %[reg3], %[x3_e], %[x2_e]\n\t"            \
            "eor %[x3_e], %[x3_e], %[reg0]\n\t"            \
            "bic %[reg2], %[x0_o], %[x4_o]\n\t"            \
            "bic %[reg0], %[x2_o], %[x1_o]\n\t"            \
            "bic %[reg1], %[x4_o], %[x3_o]\n\t"            \
            "eor %[x1_e], %[x1_e], %[reg3]\n\t"            \
            "eor %[x0_o], %[x0_o], %[reg0]\n\t"            \
            "eor %[x2_o], %[x2_o], %[reg1]\n\t"            \
            "bic %[reg3], %[x1_o], %[x0_o]\n\t"            \
            "bic %[reg0], %[x3_o], %[x2_o]\n\t"            \
            "eor %[x3_o], %[x3_o], %[reg2]\n\t"            \
            "eor %[x3_o], %[x3_o], %[x2_o]\n\t"            \
            "eor %[x4_o], %[x4_o], %[reg3]\n\t"            \
            "eor %[x1_o], %[x1_o], %[reg0]\n\t"            \
            "eor %[x3_e], %[x3_e], %[x2_e]\n\t"            \
            "eor %[x1_e], %[x1_e], %[x0_e]\n\t"            \
            "eor %[x1_o], %[x1_o], %[x0_o]\n\t"            \
            "eor %[x0_e], %[x0_e], %[x4_e]\n\t"            \
            "eor %[x0_o], %[x0_o], %[x4_o]\n\t"            \
            "mvn %[x2_e], %[x2_e]\n\t"                     \
            "mvn %[x2_o], %[x2_o]\n\t"                     \
            "eor %[reg0], %[x0_e], %[x0_o], ror #4\n\t"    \
            "eor %[reg1], %[x0_o], %[x0_e], ror #5\n\t"    \
            "eor %[reg2], %[x1_e], %[x1_e], ror #11\n\t"   \
            "eor %[reg3], %[x1_o], %[x1_o], ror #11\n\t"   \
            "eor %[x0_e], %[x0_e], %[reg1], ror #9\n\t"    \
            "eor %[x0_o], %[x0_o], %[reg0], ror #10\n\t"   \
            "eor %[x1_e], %[x1_e], %[reg3], ror #19\n\t"   \
            "eor %[x1_o], %[x1_o], %[reg2], ror #20\n\t"   \
            "eor %[reg0], %[x2_e], %[x2_o], ror #2\n\t"    \
            "eor %[reg1], %[x2_o], %[x2_e], ror #3\n\t"    \
            "eor %[reg2], %[x3_e], %[x3_o], ror #3\n\t"    \
            "eor %[reg3], %[x3_o], %[x3_e], ror #4\n\t"    \
            "eor %[x2_e], %[x2_e], %[reg1]\n\t"            \
            "eor %[x2_o], %[x2_o], %[reg0], ror #1\n\t"    \
            "eor %[x3_e], %[x3_e], %[reg2], ror #5\n\t"    \
            "eor %[x3_o], %[x3_o], %[reg3], ror #5\n\t"    \
            "eor %[reg0], %[x4_e], %[x4_e], ror #17\n\t"   \
            "eor %[reg1], %[x4_o], %[x4_o], ror #17\n\t"   \
            "eor %[x4_e], %[x4_e], %[reg1], ror #3\n\t"    \
            "eor %[x4_o], %[x4_o], %[reg0], ror #4\n\t"    \
            : [ x0_e ] "+r"(x0.e), [ x1_e ] "+r"(x1.e),    \
              [ x2_e ] "+r"(x2.e), [ x3_e ] "+r"(x3.e),    \
              [ x4_e ] "+r"(x4.e), [ x0_o ] "+r"(x0.o),    \
              [ x1_o ] "+r"(x1.o), [ x2_o ] "+r"(x2.o),    \
              [ x3_o ] "+r"(x3.o), [ x4_o ] "+r"(x4.o),    \
              [ reg0 ] "=r"(reg0), [ reg1 ] "=r"(reg1),    \
              [ reg2 ] "=r"(reg2), [ reg3 ] "=r"(reg3)::); \
    } while (0)

/* ---------------------------------------------------------------- */

#define P_LOOP(rounds)                         \
    do                                         \
    {                                          \
        u32 reg0, reg1, reg2, reg3;            \
        for (u32 r = 12 - rounds; r < 12; r++) \
        {                                      \
            x2.e ^= rc_e[r];                   \
            x2.o ^= rc_o[r];                   \
            ROUND();                           \
        }                                      \
    } while (0)

/* ---------------------------------------------------------------- */

#define P_12()                      \
    do                              \
    {                               \
        u32 reg0, reg1, reg2, reg3; \
        x2.e ^= 0xc;                \
        x2.o ^= 0xc;                \
        ROUND();                    \
        x2.e ^= 0x9;                \
        x2.o ^= 0xc;                \
        ROUND();                    \
        x2.e ^= 0xc;                \
        x2.o ^= 0x9;                \
        ROUND();                    \
        x2.e ^= 0x9;                \
        x2.o ^= 0x9;                \
        ROUND();                    \
        x2.e ^= 0x6;                \
        x2.o ^= 0xc;                \
        ROUND();                    \
        x2.e ^= 0x3;                \
        x2.o ^= 0xc;                \
        ROUND();                    \
        x2.e ^= 0x6;                \
        x2.o ^= 0x9;                \
        ROUND();                    \
        x2.e ^= 0x3;                \
        x2.o ^= 0x9;                \
        ROUND();                    \
        x2.e ^= 0xc;                \
        x2.o ^= 0x6;                \
        ROUND();                    \
        x2.e ^= 0x9;                \
        x2.o ^= 0x6;                \
        ROUND();                    \
        x2.e ^= 0xc;                \
        x2.o ^= 0x3;                \
        ROUND();                    \
        x2.e ^= 0x9;                \
        x2.o ^= 0x3;                \
        ROUND();                    \
    } while (0)

/* ---------------------------------------------------------------- */

#define P_6()                       \
    do                              \
    {                               \
        u32 reg0, reg1, reg2, reg3; \
        x2.e ^= 0x6;                \
        x2.o ^= 0x9;                \
        ROUND();                    \
        x2.e ^= 0x3;                \
        x2.o ^= 0x9;                \
        ROUND();                    \
        x2.e ^= 0xc;                \
        x2.o ^= 0x6;                \
        ROUND();                    \
        x2.e ^= 0x9;                \
        x2.o ^= 0x6;                \
        ROUND();                    \
        x2.e ^= 0xc;                \
        x2.o ^= 0x3;                \
        ROUND();                    \
        x2.e ^= 0x9;                \
        x2.o ^= 0x3;                \
        ROUND();                    \
    } while (0)

/* ---------------------------------------------------------------- */

#define P_1()                       \
    do                              \
    {                               \
        u32 reg0, reg1, reg2, reg3; \
        x2.e ^= 0x9;                \
        x2.o ^= 0x3;                \
        ROUND();                    \
    } while (0)

#endif // ASCONP_H_