permutations_asm.S 3.15 KB
Newer Older
Ferdinand Bachmann committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
#include <xtensa/coreasm.h>

#define tmp0 a2
#define rnd a3
#define x0h a4
#define x0l a5
#define x1h a6
#define x1l a7
#define x2h a8
#define x2l a9
#define x3h a10
#define x3l a11
#define x4h a12
#define x4l a13
#define t0h a14
#define t0l a15
#define tmp1 a14
#define tmp2 a15

.section .text
.align 4
.global P
.type P,@function
P:
    abi_entry 4
    l32i x0h, a2, 0
    l32i x0l, a2, 4
    l32i x1h, a2, 8
    l32i x1l, a2, 12
    l32i x2h, a2, 16
    l32i x2l, a2, 20
    l32i x3h, a2, 24
    l32i x3l, a2, 28
    l32i x4h, a2, 32
    l32i x4l, a2, 36
    s32i a2, a1, 0

.Lround_start:
    /* round constant */
    movi tmp0, 15
    sub tmp0, tmp0, rnd
    slli tmp0, tmp0, 4
    or tmp0, tmp0, rnd
    xor x2l, x2l, tmp0

    /* s-box layer */
    movi tmp0, -1
    /* high */
    xor x0h, x0h, x4h
    xor x4h, x4h, x3h
    xor x2h, x2h, x1h
    xor tmp1, tmp0, x4h
    and tmp2, tmp1, x0h
    xor tmp1, tmp0, x1h
    and tmp1, tmp1, x2h
    xor x0h, x0h, tmp1
    xor tmp1, tmp0, x3h
    and tmp1, tmp1, x4h
    xor x2h, x2h, tmp1
    xor tmp1, tmp0, x0h
    and tmp1, tmp1, x1h
    xor x4h, x4h, tmp1
    xor tmp1, tmp0, x2h
    and tmp1, tmp1, x3h
    xor x1h, x1h, tmp1
    xor x3h, x3h, tmp2
    xor x1h, x1h, x0h
    xor x3h, x3h, x2h
    xor x0h, x0h, x4h
    xor x2h, x2h, tmp0
    /* low */
    xor x0l, x0l, x4l
    xor x4l, x4l, x3l
    xor x2l, x2l, x1l
    xor tmp1, tmp0, x4l
    and tmp2, tmp1, x0l
    xor tmp1, tmp0, x1l
    and tmp1, tmp1, x2l
    xor x0l, x0l, tmp1
    xor tmp1, tmp0, x3l
    and tmp1, tmp1, x4l
    xor x2l, x2l, tmp1
    xor tmp1, tmp0, x0l
    and tmp1, tmp1, x1l
    xor x4l, x4l, tmp1
    xor tmp1, tmp0, x2l
    and tmp1, tmp1, x3l
    xor x1l, x1l, tmp1
    xor x3l, x3l, tmp2
    xor x1l, x1l, x0l
    xor x3l, x3l, x2l
    xor x0l, x0l, x4l
    xor x2l, x2l, tmp0

    /* linear layer */
    ssai 19
    src t0l, x0h, x0l
    src t0h, x0l, x0h
    xor t0l, t0l, x0l
    xor t0h, t0h, x0h
    ssai 28
    src tmp0, x0h, x0l
    src x0h, x0l, x0h
    xor x0l, tmp0, t0l
    xor x0h, x0h, t0h
    ssai 29 /* inverted 61 */
    src t0l, x1l, x1h
    src t0h, x1h, x1l
    xor t0l, t0l, x1l
    xor t0h, t0h, x1h
    ssai 7 /* inverted 39 */
    src tmp0, x1l, x1h
    src x1h, x1h, x1l
    xor x1l, tmp0, t0l
    xor x1h, x1h, t0h
    ssai 1
    src t0l, x2h, x2l
    src t0h, x2l, x2h
    xor t0l, t0l, x2l
    xor t0h, t0h, x2h
    ssai 6
    src tmp0, x2h, x2l
    src x2h, x2l, x2h
    xor x2l, tmp0, t0l
    xor x2h, x2h, t0h
    ssai 10
    src t0l, x3h, x3l
    src t0h, x3l, x3h
    xor t0l, t0l, x3l
    xor t0h, t0h, x3h
    ssai 17
    src tmp0, x3h, x3l
    src x3h, x3l, x3h
    xor x3l, tmp0, t0l
    xor x3h, x3h, t0h
    ssai 7
    src t0l, x4h, x4l
    src t0h, x4l, x4h
    xor t0l, t0l, x4l
    xor t0h, t0h, x4h
    ssai 9 /* inverted 41 */
    src tmp0, x4l, x4h
    src x4h, x4h, x4l
    xor x4l, tmp0, t0l
    xor x4h, x4h, t0h

    /* loop */
    addi rnd, rnd, 1
    bltui rnd, 12, .Lround_start

.Lend:
    l32i a2, a1, 0
    s32i x0h, a2, 0
    s32i x0l, a2, 4
    s32i x1h, a2, 8
    s32i x1l, a2, 12
    s32i x2h, a2, 16
    s32i x2l, a2, 20
    s32i x3h, a2, 24
    s32i x3l, a2, 28
    s32i x4h, a2, 32
    s32i x4l, a2, 36
    abi_return