permutations_asm.S 3.59 KB
Newer Older
Ferdinand Bachmann committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
#include <xtensa/coreasm.h>

.section .data
.align 2
.global ascon_round_constants
.type ascon_round_constants,@object
ascon_round_constants:
    .byte 0xc, 0xc
    .byte 0x9, 0xc
    .byte 0xc, 0x9
    .byte 0x9, 0x9
    .byte 0x6, 0xc
    .byte 0x3, 0xc
    .byte 0x6, 0x9
    .byte 0x3, 0x9
    .byte 0xc, 0x6
    .byte 0x9, 0x6
    .byte 0xc, 0x3
    .byte 0x9, 0x3

#define x0e a4
#define x0o a5
#define x1e a6
#define x1o a7
#define x2e a8
#define x2o a9
#define x3e a10
#define x3o a11
#define x4e a12
#define x4o a13
#define t0e a14
#define t0o a15
#define tmp a14
#define neg a15

.section .text
.align 4
.global P
.type P,@function
P:
    abi_entry 4
    l32i x0e, a2, 0
    l32i x0o, a2, 4
    l32i x1e, a2, 8
    l32i x1o, a2, 12
    l32i x2e, a2, 16
    l32i x2o, a2, 20
    l32i x3e, a2, 24
    l32i x3o, a2, 28
    l32i x4e, a2, 32
    l32i x4o, a2, 36
    s32i a2, a1, 0
    movi a2, ascon_round_constants
    addx2 a2, a3, a2

.Lround_start:
    /* round constant */
    l8ui t0e, a2, 0
    xor x2e, x2e, a14
    l8ui t0o, a2, 1
    xor x2o, x2o, a15

    /* s-box layer */
    /* even */
    xor x0e, x0e, x4e
    xor x4e, x4e, x3e
    xor x2e, x2e, x1e
    movi neg, -1
    xor neg, neg, x4e
    and tmp, neg, x0e
    movi neg, -1
    xor neg, neg, x1e
    and neg, neg, x2e
    xor x0e, x0e, neg
    movi neg, -1
    xor neg, neg, x3e
    and neg, neg, x4e
    xor x2e, x2e, neg
    movi neg, -1
    xor neg, neg, x0e
    and neg, neg, x1e
    xor x4e, x4e, neg
    movi neg, -1
    xor neg, neg, x2e
    and neg, neg, x3e
    xor x1e, x1e, neg
    xor x3e, x3e, tmp
    xor x1e, x1e, x0e
    xor x3e, x3e, x2e
    xor x0e, x0e, x4e
    /* odd */
    xor x0o, x0o, x4o
    xor x4o, x4o, x3o
    xor x2o, x2o, x1o
    movi neg, -1
    xor neg, neg, x4o
    and tmp, neg, x0o
    movi neg, -1
    xor neg, neg, x1o
    and neg, neg, x2o
    xor x0o, x0o, neg
    movi neg, -1
    xor neg, neg, x3o
    and neg, neg, x4o
    xor x2o, x2o, neg
    movi neg, -1
    xor neg, neg, x0o
    and neg, neg, x1o
    xor x4o, x4o, neg
    movi neg, -1
    xor neg, neg, x2o
    and neg, neg, x3o
    xor x1o, x1o, neg
    xor x3o, x3o, tmp
    xor x1o, x1o, x0o
    xor x3o, x3o, x2o
    xor x0o, x0o, x4o

    /* linear layer */
    ssai 4
    src t0e, x0o, x0o
    xor t0e, t0e, x0e
    ssai 5
    src t0o, x0e, x0e
    xor t0o, t0o, x0o
    ssai 9
    src t0o, t0o, t0o
    xor x0e, x0e, t0o
    ssai 10
    src t0e, t0e, t0e
    xor x0o, x0o, t0e
    ssai 11
    src t0e, x1e, x1e
    xor t0e, t0e, x1e
    src t0o, x1o, x1o
    xor t0o, t0o, x1o
    ssai 19
    src t0o, t0o, t0o
    xor x1e, x1e, t0o
    ssai 20
    src t0e, t0e, t0e
    xor x1o, x1o, t0e
    ssai 2
    src t0e, x2o, x2o
    xor t0e, t0e, x2e
    ssai 3
    src t0o, x2e, x2e
    xor t0o, t0o, x2o
    xor x2e, x2e, t0o
    ssai 1
    src t0e, t0e, t0e
    xor x2o, x2o, t0e
    ssai 3
    src t0e, x3o, x3o
    xor t0e, t0e, x3e
    ssai 4
    src t0o, x3e, x3e
    xor t0o, t0o, x3o
    ssai 5
    src t0e, t0e, t0e
    xor x3e, x3e, t0e
    src t0o, t0o, t0o
    xor x3o, x3o, t0o
    ssai 17
    src t0e, x4e, x4e
    xor t0e, t0e, x4e
    src t0o, x4o, x4o
    xor t0o, t0o, x4o
    ssai 3
    src t0o, t0o, t0o
    xor x4e, x4e, t0o
    ssai 4
    src t0e, t0e, t0e
    xor x4o, x4o, t0e
    movi neg, -1
    xor x2e, x2e, neg
    xor x2o, x2o, neg

    /* loop */
    addi a2, a2, 2
    addi a3, a3, 1
    bltui a3, 12, .Lround_start

.Lend:
    l32i a2, a1, 0
    s32i x0e, a2, 0
    s32i x0o, a2, 4
    s32i x1e, a2, 8
    s32i x1o, a2, 12
    s32i x2e, a2, 16
    s32i x2o, a2, 20
    s32i x3e, a2, 24
    s32i x3o, a2, 28
    s32i x4e, a2, 32
    s32i x4o, a2, 36
    abi_return