#include #define tmp0 a2 #define rnd a3 #define x0h a4 #define x0l a5 #define x1h a6 #define x1l a7 #define x2h a8 #define x2l a9 #define x3h a10 #define x3l a11 #define x4h a12 #define x4l a13 #define t0h a14 #define t0l a15 #define tmp1 a14 #define tmp2 a15 .section .text .align 4 .global P .type P,@function P: abi_entry 4 l32i x0h, a2, 0 l32i x0l, a2, 4 l32i x1h, a2, 8 l32i x1l, a2, 12 l32i x2h, a2, 16 l32i x2l, a2, 20 l32i x3h, a2, 24 l32i x3l, a2, 28 l32i x4h, a2, 32 l32i x4l, a2, 36 s32i a2, a1, 0 .Lround_start: /* round constant */ movi tmp0, 15 sub tmp0, tmp0, rnd slli tmp0, tmp0, 4 or tmp0, tmp0, rnd xor x2l, x2l, tmp0 /* s-box layer */ movi tmp0, -1 /* high */ xor x0h, x0h, x4h xor x4h, x4h, x3h xor x2h, x2h, x1h xor tmp1, tmp0, x4h and tmp2, tmp1, x0h xor tmp1, tmp0, x1h and tmp1, tmp1, x2h xor x0h, x0h, tmp1 xor tmp1, tmp0, x3h and tmp1, tmp1, x4h xor x2h, x2h, tmp1 xor tmp1, tmp0, x0h and tmp1, tmp1, x1h xor x4h, x4h, tmp1 xor tmp1, tmp0, x2h and tmp1, tmp1, x3h xor x1h, x1h, tmp1 xor x3h, x3h, tmp2 xor x1h, x1h, x0h xor x3h, x3h, x2h xor x0h, x0h, x4h xor x2h, x2h, tmp0 /* low */ xor x0l, x0l, x4l xor x4l, x4l, x3l xor x2l, x2l, x1l xor tmp1, tmp0, x4l and tmp2, tmp1, x0l xor tmp1, tmp0, x1l and tmp1, tmp1, x2l xor x0l, x0l, tmp1 xor tmp1, tmp0, x3l and tmp1, tmp1, x4l xor x2l, x2l, tmp1 xor tmp1, tmp0, x0l and tmp1, tmp1, x1l xor x4l, x4l, tmp1 xor tmp1, tmp0, x2l and tmp1, tmp1, x3l xor x1l, x1l, tmp1 xor x3l, x3l, tmp2 xor x1l, x1l, x0l xor x3l, x3l, x2l xor x0l, x0l, x4l xor x2l, x2l, tmp0 /* linear layer */ ssai 19 src t0l, x0h, x0l src t0h, x0l, x0h xor t0l, t0l, x0l xor t0h, t0h, x0h ssai 28 src tmp0, x0h, x0l src x0h, x0l, x0h xor x0l, tmp0, t0l xor x0h, x0h, t0h ssai 29 /* inverted 61 */ src t0l, x1l, x1h src t0h, x1h, x1l xor t0l, t0l, x1l xor t0h, t0h, x1h ssai 7 /* inverted 39 */ src tmp0, x1l, x1h src x1h, x1h, x1l xor x1l, tmp0, t0l xor x1h, x1h, t0h ssai 1 src t0l, x2h, x2l src t0h, x2l, x2h xor t0l, t0l, x2l xor t0h, t0h, x2h ssai 6 src tmp0, x2h, x2l src x2h, x2l, x2h xor x2l, tmp0, t0l xor x2h, x2h, t0h ssai 10 src t0l, x3h, x3l src t0h, x3l, x3h xor t0l, t0l, x3l xor t0h, t0h, x3h ssai 17 src tmp0, x3h, x3l src x3h, x3l, x3h xor x3l, tmp0, t0l xor x3h, x3h, t0h ssai 7 src t0l, x4h, x4l src t0h, x4l, x4h xor t0l, t0l, x4l xor t0h, t0h, x4h ssai 9 /* inverted 41 */ src tmp0, x4l, x4h src x4h, x4h, x4l xor x4l, tmp0, t0l xor x4h, x4h, t0h /* loop */ addi rnd, rnd, 1 bltui rnd, 12, .Lround_start .Lend: l32i a2, a1, 0 s32i x0h, a2, 0 s32i x0l, a2, 4 s32i x1h, a2, 8 s32i x1l, a2, 12 s32i x2h, a2, 16 s32i x2l, a2, 20 s32i x3h, a2, 24 s32i x3l, a2, 28 s32i x4h, a2, 32 s32i x4l, a2, 36 abi_return