Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
lwc
/
candidates
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
60a8ce1b
authored
Jun 03, 2020
by
Alexandre Adomnicai
Committed by
Enrico Pozzobon
Jun 03, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
skinny
parent
a6544c4f
Show whitespace changes
Inline
Side-by-side
Showing
21 changed files
with
252 additions
and
240 deletions
+252
-240
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/encrypt.c
+1
-3
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/skinny128.c
+6
-24
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/skinnyaead.h
+1
-3
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/tk_schedule.c
+87
-22
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/tk_schedule.h
+3
-9
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/encrypt.c
+12
-9
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinny128.c
+9
-28
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinny128.h
+1
-0
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinnyaead.h
+1
-3
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/tk_schedule.c
+1
-5
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/tk_schedule.h
+4
-2
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/encrypt.c
+2
-12
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/skinny128.c
+5
-32
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/skinnyaead.h
+1
-3
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/tk_schedule.c
+87
-22
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/tk_schedule.h
+4
-9
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/encrypt.c
+12
-9
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/skinny128.c
+10
-36
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/skinnyaead.h
+1
-3
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/tk_schedule.c
+0
-4
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/tk_schedule.h
+4
-2
No files found.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/encrypt.c
View file @
60a8ce1b
...
...
@@ -8,12 +8,10 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
#include "skinny128.h"
#include "skinnyaead.h"
#include <string.h>
#include <stdio.h>
/******************************************************************************
* x ^= y where x, y are 128-bit blocks (16 bytes array).
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/skinny128.c
View file @
60a8ce1b
...
...
@@ -16,12 +16,9 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 0
...
...
@@ -153,16 +150,8 @@ void skinny128_384_plus_encrypt(u8* ctext, const u8* ptext, const u32* rtk1,
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
...
...
@@ -176,15 +165,7 @@ void skinny128_384_plus_decrypt(u8* ctext, const u8* ptext, const u32* rtk1,
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
for
(
int
i
=
9
;
i
>=
0
;
i
--
)
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
\ No newline at end of file
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/skinnyaead.h
View file @
60a8ce1b
...
...
@@ -3,9 +3,7 @@
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
typedef
unsigned
long
long
u64
;
typedef
uint64_t
u64
;
#define TAGBYTES 16
#define KEYBYTES 16
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/tk_schedule.c
View file @
60a8ce1b
...
...
@@ -4,16 +4,11 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
#include <stdio.h>
#include <string.h> //for memcmp
#include "tk_schedule.h"
#include <string.h>
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
/******************************************************************************
* The round constants according to the new representation.
******************************************************************************/
...
...
@@ -260,6 +255,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
}
/******************************************************************************
* Precompute LFSR2(TK2) ^ LFSR3(TK3) for all round tweakeys.
* It is equivalent to the following 2 function calls:
* - precompute_lfsr_tk2(tk, t2, SKINNY128_384_ROUNDS);
* - precompute_lfsr_tk3(tk, t3, SKINNY128_384_ROUNDS);
* However 'precompute_lfsr_tk2_3' can allow to save cycles on some platform.
* On ARMv7 one should observe a gain of ~1k cycles per function call. It can be
* explained by the fact that less memory accesses to 'tk' are computed.
*
* To save some code size, the loop can be replaced by the following one:
* for(int i = 0 ; i < rounds; i+=2) {
* lfsr2_bs(tk2);
* lfsr3_bs(tk3);
* tk[i*4+4] = tk2[0] ^ tk3[0];
* tk[i*4+5] = tk2[1] ^ tk3[1];
* tk[i*4+6] = tk2[2] ^ tk3[2];
* tk[i*4+7] = tk2[3] ^ tk3[3];
* }
* at the cost of some cycles (~260 on ARM Cortex-M).
******************************************************************************/
void
precompute_lfsr_tk2_3
(
u32
*
tk
,
const
u8
*
t2
,
const
u8
*
t3
,
const
int
rounds
)
{
u32
tk2
[
4
],
tk3
[
4
];
packing
(
tk2
,
t2
);
packing
(
tk3
,
t3
);
tk
[
0
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
1
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
2
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
3
]
=
tk2
[
3
]
^
tk3
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
tk2
[
0
]
^=
(
tk2
[
2
]
&
0xaaaaaaaa
);
tk2
[
0
]
=
((
tk2
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
3
]
^=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
3
]
=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
4
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
5
]
=
tk2
[
2
]
^
tk3
[
0
];
tk
[
i
*
4
+
6
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
7
]
=
tk2
[
0
]
^
tk3
[
2
];
tk2
[
1
]
^=
(
tk2
[
3
]
&
0xaaaaaaaa
);
tk2
[
1
]
=
((
tk2
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
2
]
^=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
2
]
=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
12
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
13
]
=
tk2
[
3
]
^
tk3
[
3
];
tk
[
i
*
4
+
14
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
15
]
=
tk2
[
1
]
^
tk3
[
1
];
tk2
[
2
]
^=
(
tk2
[
0
]
&
0xaaaaaaaa
);
tk2
[
2
]
=
((
tk2
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
1
]
^=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
1
]
=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
20
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
21
]
=
tk2
[
0
]
^
tk3
[
2
];
tk
[
i
*
4
+
22
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
23
]
=
tk2
[
2
]
^
tk3
[
0
];
tk2
[
3
]
^=
(
tk2
[
1
]
&
0xaaaaaaaa
);
tk2
[
3
]
=
((
tk2
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
0
]
^=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
0
]
=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
28
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
29
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
i
*
4
+
30
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
31
]
=
tk2
[
3
]
^
tk3
[
3
];
}
}
/******************************************************************************
* XOR TK with TK1 before applying the permutations.
* The key is then rearranged to match the barrel shiftrows representation.
******************************************************************************/
...
...
@@ -267,19 +326,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
u32
test
;
u32
tk1
[
4
],
tmp
[
4
];
packing
(
tk1
,
key
);
memcpy
(
tmp
,
tk
,
16
);
tmp
[
0
]
^=
tk1
[
0
];
tmp
[
1
]
^=
tk1
[
1
];
tmp
[
2
]
^=
tk1
[
2
];
tmp
[
3
]
^=
tk1
[
3
];
tmp
[
0
]
=
tk
[
0
]
^
tk1
[
0
];
tmp
[
1
]
=
tk
[
1
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
2
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
3
]
^
tk1
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
4
,
16
);
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
0
]
=
tk
[
i
*
4
+
4
]
^
tk1
[
0
];
tmp
[
1
]
=
tk
[
i
*
4
+
5
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
6
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
7
]
^
tk1
[
3
];
if
(
test
)
permute_tk_2
(
tmp
);
// applies P^2
else
...
...
@@ -296,8 +356,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
12
,
16
);
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
0
]
=
tk
[
i
*
4
+
12
]
^
tk1
[
0
];
tmp
[
1
]
=
tk
[
i
*
4
+
13
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
14
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
15
]
^
tk1
[
3
];
if
(
test
)
permute_tk_4
(
tmp
);
// applies P^4
else
...
...
@@ -310,8 +372,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
20
,
16
);
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
0
]
=
tk
[
i
*
4
+
20
]
^
tk1
[
0
];
tmp
[
1
]
=
tk
[
i
*
4
+
21
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
22
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
23
]
^
tk1
[
3
];
if
(
test
)
permute_tk_6
(
tmp
);
// applies P^6
else
...
...
@@ -328,8 +392,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
28
,
16
);
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
0
]
=
tk
[
i
*
4
+
28
]
^
tk1
[
0
];
tmp
[
1
]
=
tk
[
i
*
4
+
29
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
30
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
31
]
^
tk1
[
3
];
if
(
test
)
permute_tk_8
(
tmp
);
// applies P^8
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
...
...
@@ -350,8 +416,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
******************************************************************************/
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2
(
rtk
,
tk2
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk3
(
rtk
,
tk3
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2_3
(
rtk
,
tk2
,
tk3
,
SKINNY128_384_ROUNDS
);
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/tk_schedule.h
View file @
60a8ce1b
#ifndef TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
#include <stdint.h>
typedef
uint8_t
u8
;
typedef
uint32_t
u32
;
void
packing
(
u32
*
out
,
const
u8
*
in
);
void
unpacking
(
u8
*
out
,
u32
*
in
);
...
...
@@ -11,13 +12,6 @@ void precompute_rtk1(u32* rtk1, const u8* tk1);
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define XOR_BLOCKS(x,y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
(x)[2] ^= (y)[2]; \
(x)[3] ^= (y)[3]; \
})
#define SWAPMOVE(a, b, mask, n) ({ \
tmp = (b ^ (a >> n)) & mask; \
b ^= tmp; \
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/encrypt.c
View file @
60a8ce1b
...
...
@@ -8,12 +8,10 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
#include "skinny128.h"
#include "skinnyaead.h"
#include <string.h>
#include <stdio.h>
/******************************************************************************
* x ^= y where x, y are 128-bit blocks (16 bytes array).
...
...
@@ -33,13 +31,13 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
u8
feedback
;
u8
tmp
[
2
*
BLOCKBYTES
];
memset
(
tmp
,
0x00
,
2
*
BLOCKBYTES
);
memset
(
auth
,
0x00
,
BLOCKBYTES
);
SET_DOMAIN
(
tmp
,
0x02
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
memset
(
auth
,
0x00
,
BLOCKBYTES
);
while
(
adlen
>=
2
*
BLOCKBYTES
)
{
LE_STR_64
(
tmp
,
lfsr
);
UPDATE_LFSR
(
lfsr
);
LE_STR_64
(
tmp
+
BLOCKBYTES
,
lfsr
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
+
BLOCKBYTES
);
skinny128_384_plus_encrypt
(
tmp
,
tmp
+
BLOCKBYTES
,
ad
,
ad
+
BLOCKBYTES
,
*
tk
);
xor_block
(
auth
,
tmp
);
...
...
@@ -47,6 +45,9 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
adlen
-=
2
*
BLOCKBYTES
;
ad
+=
2
*
BLOCKBYTES
;
UPDATE_LFSR
(
lfsr
);
memset
(
tmp
,
0x00
,
2
*
BLOCKBYTES
);
// to save 32 bytes of RAM
SET_DOMAIN
(
tmp
,
0x02
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
}
if
(
adlen
>
BLOCKBYTES
)
{
// pad and process 2 blocs in //
LE_STR_64
(
tmp
,
lfsr
);
...
...
@@ -65,11 +66,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
LE_STR_64
(
tmp
,
lfsr
);
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
skinny128_384_plus_encrypt
(
auth
,
c
,
ad
,
c
,
*
tk
);
skinny128_384_plus_encrypt
(
tmp
,
c
,
ad
,
c
,
*
tk
);
}
else
{
// if tag has been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
skinny128_384_plus_encrypt
(
auth
,
auth
,
ad
,
ad
,
*
tk
);
skinny128_384_plus_encrypt
(
tmp
,
tmp
,
ad
,
ad
,
*
tk
);
}
xor_block
(
auth
,
tmp
);
}
else
if
(
adlen
>
0
)
{
LE_STR_64
(
tmp
,
lfsr
);
SET_DOMAIN
(
tmp
,
0x03
);
// domain for padding ad
...
...
@@ -78,11 +80,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
tmp
[
BLOCKBYTES
+
adlen
]
^=
0x80
;
// padding
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
skinny128_384_plus_encrypt
(
auth
,
c
,
tmp
+
BLOCKBYTES
,
c
,
*
tk
);
skinny128_384_plus_encrypt
(
tmp
,
c
,
tmp
+
BLOCKBYTES
,
c
,
*
tk
);
}
else
{
// if tag has been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
skinny128_384_plus_encrypt
(
auth
,
auth
,
tmp
+
BLOCKBYTES
,
tmp
+
BLOCKBYTES
,
*
tk
);
skinny128_384_plus_encrypt
(
tmp
,
tmp
,
tmp
+
BLOCKBYTES
,
tmp
+
BLOCKBYTES
,
*
tk
);
}
xor_block
(
auth
,
tmp
);
}
}
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinny128.c
View file @
60a8ce1b
...
...
@@ -16,12 +16,9 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/****************************************************************************
* The MixColumns operation for rounds i such that (i % 4) == 0.
...
...
@@ -84,7 +81,7 @@ void mixcolumns_3(u32* state) {
}
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 0
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 0
****************************************************************************/
void
inv_mixcolumns_0
(
u32
*
state
)
{
u32
tmp
;
...
...
@@ -99,7 +96,7 @@ void inv_mixcolumns_0(u32* state) {
}
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 1
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 1
****************************************************************************/
void
inv_mixcolumns_1
(
u32
*
state
)
{
u32
tmp
;
...
...
@@ -114,7 +111,7 @@ void inv_mixcolumns_1(u32* state) {
}
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 2
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 2
****************************************************************************/
void
inv_mixcolumns_2
(
u32
*
state
)
{
u32
tmp
;
...
...
@@ -129,7 +126,7 @@ void inv_mixcolumns_2(u32* state) {
}
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 3
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 3
****************************************************************************/
void
inv_mixcolumns_3
(
u32
*
state
)
{
u32
tmp
;
...
...
@@ -166,16 +163,8 @@ void skinny128_384_plus_encrypt(u8* ctext, u8* ctext_bis, const u8* ptext,
const
u8
*
ptext_bis
,
const
tweakey
tk
)
{
u32
state
[
8
];
packing
(
state
,
ptext
,
ptext_bis
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
160
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
192
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
224
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
256
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
288
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
(
i
%
4
)
*
32
,
tk
.
rtk2_3
+
i
*
32
);
unpacking
(
ctext
,
ctext_bis
,
state
);
}
...
...
@@ -188,15 +177,7 @@ void skinny128_384_plus_decrypt(u8* ptext, u8* ptext_bis, const u8* ctext,
const
u8
*
ctext_bis
,
const
tweakey
tk
)
{
u32
state
[
8
];
packing
(
state
,
ctext
,
ctext_bis
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
288
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
256
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
224
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
192
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
160
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
128
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
96
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
64
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
32
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
);
for
(
int
i
=
9
;
i
>=
0
;
i
--
)
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
(
i
%
4
)
*
32
,
tk
.
rtk2_3
+
i
*
32
);
unpacking
(
ptext
,
ptext_bis
,
state
);
}
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinny128.h
View file @
60a8ce1b
#ifndef SKINNY128_H_
#define SKINNY128_H_
#include "tk_schedule.h"
void
skinny128_384_plus_encrypt
(
u8
*
ctext
,
u8
*
ctext_bis
,
const
u8
*
ptext
,
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinnyaead.h
View file @
60a8ce1b
...
...
@@ -3,9 +3,7 @@
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
typedef
unsigned
long
long
u64
;
typedef
uint64_t
u64
;
#define TAGBYTES 16
#define KEYBYTES 16
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/tk_schedule.c
View file @
60a8ce1b
...
...
@@ -7,15 +7,11 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date
May
2020
* @date
June
2020
*******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "tk_schedule.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
/****************************************************************************
* The round constants according to the fixsliced representation.
****************************************************************************/
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/tk_schedule.h
View file @
60a8ce1b
#ifndef TK_SCHEDULE_BS_H_
#define TK_SCHEDULE_BS_H_
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
#include <stdint.h>
typedef
uint8_t
u8
;
typedef
uint32_t
u32
;
typedef
struct
{
u32
rtk1
[
8
*
16
];
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/encrypt.c
View file @
60a8ce1b
/******************************************************************************
* Constant-time implementation of SKINNY-AEAD-M1 (v1.1).
*
* Two blocks are treated in parallel with SKINNY-128-384 whenever possible.
* Constant-time implementation of SKINNY-AEAD-M1(v1).
*
* For more details, see the paper at: https://
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
#include "skinny128.h"
#include "skinnyaead.h"
#include <string.h>
#include <stdio.h>
/******************************************************************************
* x ^= y where x, y are 128-bit blocks (16 bytes array).
...
...
@@ -75,12 +71,6 @@ int crypto_aead_encrypt (unsigned char *c, unsigned long long *clen,
}
LE_STR_64
(
tmp
,
lfsr
);
// lfsr for tag computation
precompute_rtk1
(
rtk1
,
tmp
);
for
(
int
i
=
0
;
i
<
16
;
i
++
)
{
printf
(
"%08x %08x %08x %08x
\n
"
,
rtk1
[
i
*
4
],
rtk1
[
i
*
4
+
1
],
rtk1
[
i
*
4
+
2
],
rtk1
[
i
*
4
+
3
]);
}
for
(
int
i
=
0
;
i
<
56
;
i
++
)
{
printf
(
"%08x %08x %08x %08x
\n
"
,
rtk2_3
[
i
*
4
],
rtk2_3
[
i
*
4
+
1
],
rtk2_3
[
i
*
4
+
2
],
rtk2_3
[
i
*
4
+
3
]);
}
skinny128_384_encrypt
(
c
,
c
,
rtk1
,
rtk2_3
);
// compute the tag
// ----------------- Process the plaintext -----------------
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/skinny128.c
View file @
60a8ce1b
...
...
@@ -16,12 +16,9 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 0
...
...
@@ -153,20 +150,8 @@ void skinny128_384_encrypt(u8* ctext, const u8* ptext, const u32* rtk1,
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
160
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
176
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
192
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
208
);
for
(
int
i
=
0
;
i
<
14
;
i
++
)
QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
...
...
@@ -180,19 +165,7 @@ void skinny128_384_decrypt(u8* ctext, const u8* ptext, const u32* rtk1,
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
208
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
192
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
176
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
160
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
for
(
int
i
=
13
;
i
>=
0
;
i
--
)
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/skinnyaead.h
View file @
60a8ce1b
...
...
@@ -3,9 +3,7 @@
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
typedef
unsigned
long
long
u64
;
typedef
uint64_t
u64
;
#define TAGBYTES 16
#define KEYBYTES 16
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/tk_schedule.c
View file @
60a8ce1b
...
...
@@ -4,16 +4,11 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
#include <stdio.h>
#include <string.h> //for memcmp
#include "tk_schedule.h"
#include <string.h>
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
/******************************************************************************
* The round constants according to the new representation.
******************************************************************************/
...
...
@@ -271,6 +266,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
}
/******************************************************************************
* Precompute LFSR2(TK2) ^ LFSR3(TK3) for all round tweakeys.
* It is equivalent to the following 2 function calls:
* - precompute_lfsr_tk2(tk, t2, SKINNY128_384_ROUNDS);
* - precompute_lfsr_tk3(tk, t3, SKINNY128_384_ROUNDS);
* However 'precompute_lfsr_tk2_3' can allow to save cycles on some platform.
* On ARMv7 one should observe a gain of ~1k cycles per function call. It can be
* explained by the fact that less memory accesses to 'tk' are computed.
*
* To save some code size, the loop can be replaced by the following one:
* for(int i = 0 ; i < rounds; i+=2) {
* lfsr2_bs(tk2);
* lfsr3_bs(tk3);
* tk[i*4+4] = tk2[0] ^ tk3[0];
* tk[i*4+5] = tk2[1] ^ tk3[1];
* tk[i*4+6] = tk2[2] ^ tk3[2];
* tk[i*4+7] = tk2[3] ^ tk3[3];
* }
* at the cost of some cycles (~260 on ARM Cortex-M).
******************************************************************************/
void
precompute_lfsr_tk2_3
(
u32
*
tk
,
const
u8
*
t2
,
const
u8
*
t3
,
const
int
rounds
)
{
u32
tk2
[
4
],
tk3
[
4
];
packing
(
tk2
,
t2
);
packing
(
tk3
,
t3
);
tk
[
0
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
1
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
2
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
3
]
=
tk2
[
3
]
^
tk3
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
tk2
[
0
]
^=
(
tk2
[
2
]
&
0xaaaaaaaa
);
tk2
[
0
]
=
((
tk2
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
3
]
^=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
3
]
=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
4
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
5
]
=
tk2
[
2
]
^
tk3
[
0
];
tk
[
i
*
4
+
6
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
7
]
=
tk2
[
0
]
^
tk3
[
2
];
tk2
[
1
]
^=
(
tk2
[
3
]
&
0xaaaaaaaa
);
tk2
[
1
]
=
((
tk2
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
2
]
^=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
2
]
=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
12
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
13
]
=
tk2
[
3
]
^
tk3
[
3
];
tk
[
i
*
4
+
14
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
15
]
=
tk2
[
1
]
^
tk3
[
1
];
tk2
[
2
]
^=
(
tk2
[
0
]
&
0xaaaaaaaa
);
tk2
[
2
]
=
((
tk2
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
1
]
^=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
1
]
=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
20
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
21
]
=
tk2
[
0
]
^
tk3
[
2
];
tk
[
i
*
4
+
22
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
23
]
=
tk2
[
2
]
^
tk3
[
0
];
tk2
[
3
]
^=
(
tk2
[
1
]
&
0xaaaaaaaa
);
tk2
[
3
]
=
((
tk2
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
0
]
^=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
0
]
=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
28
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
29
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
i
*
4
+
30
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
31
]
=
tk2
[
3
]
^
tk3
[
3
];
}
}
/******************************************************************************
* XOR TK with TK1 before applying the permutations.
* The key is then rearranged to match the barrel shiftrows representation.
******************************************************************************/
...
...
@@ -278,19 +337,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
u32
test
;
u32
tk1
[
4
],
tmp
[
4
];
packing
(
tk1
,
key
);
memcpy
(
tmp
,
tk
,
16
);
tmp
[
0
]
^=
tk1
[
0
];
tmp
[
1
]
^=
tk1
[
1
];
tmp
[
2
]
^=
tk1
[
2
];
tmp
[
3
]
^=
tk1
[
3
];
tmp
[
0
]
=
tk
[
0
]
^
tk1
[
0
];
tmp
[
1
]
=
tk
[
1
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
2
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
3
]
^
tk1
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
4
,
16
);
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
0
]
=
tk
[
i
*
4
+
4
]
^
tk1
[
0
];
tmp
[
1
]
=
tk
[
i
*
4
+
5
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
6
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
7
]
^
tk1
[
3
];
if
(
test
)
permute_tk_2
(
tmp
);
// applies P^2
else
...
...
@@ -307,8 +367,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
12
,
16
);
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
0
]
=
tk
[
i
*
4
+
12
]
^
tk1
[
0
];
tmp
[
1
]
=
tk
[
i
*
4
+
13
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
14
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
15
]
^
tk1
[
3
];
if
(
test
)
permute_tk_4
(
tmp
);
// applies P^4
else
...
...
@@ -321,8 +383,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
20
,
16
);
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
0
]
=
tk
[
i
*
4
+
20
]
^
tk1
[
0
];
tmp
[
1
]
=
tk
[
i
*
4
+
21
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
22
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
23
]
^
tk1
[
3
];
if
(
test
)
permute_tk_6
(
tmp
);
// applies P^6
else
...
...
@@ -339,8 +403,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
28
,
16
);
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
0
]
=
tk
[
i
*
4
+
28
]
^
tk1
[
0
];
tmp
[
1
]
=
tk
[
i
*
4
+
29
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
30
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
31
]
^
tk1
[
3
];
if
(
test
)
permute_tk_8
(
tmp
);
// applies P^8
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
...
...
@@ -361,8 +427,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
******************************************************************************/
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2
(
rtk
,
tk2
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk3
(
rtk
,
tk3
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2_3
(
rtk
,
tk2
,
tk3
,
SKINNY128_384_ROUNDS
);
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/tk_schedule.h
View file @
60a8ce1b
#ifndef TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
#include <stdint.h>
typedef
uint8_t
u8
;
typedef
uint32_t
u32
;
void
packing
(
u32
*
out
,
const
u8
*
in
);
void
unpacking
(
u8
*
out
,
u32
*
in
);
...
...
@@ -11,13 +13,6 @@ void precompute_rtk1(u32* rtk1, const u8* tk1);
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define XOR_BLOCKS(x,y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
(x)[2] ^= (y)[2]; \
(x)[3] ^= (y)[3]; \
})
#define SWAPMOVE(a, b, mask, n) ({ \
tmp = (b ^ (a >> n)) & mask; \
b ^= tmp; \
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/encrypt.c
View file @
60a8ce1b
...
...
@@ -8,12 +8,10 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
#include "skinny128.h"
#include "skinnyaead.h"
#include <string.h>
#include <stdio.h>
/******************************************************************************
* x ^= y where x, y are 128-bit blocks (16 bytes array).
...
...
@@ -33,13 +31,13 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
u8
feedback
;
u8
tmp
[
2
*
BLOCKBYTES
];
memset
(
tmp
,
0x00
,
2
*
BLOCKBYTES
);
memset
(
auth
,
0x00
,
BLOCKBYTES
);
SET_DOMAIN
(
tmp
,
0x02
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
memset
(
auth
,
0x00
,
BLOCKBYTES
);
while
(
adlen
>=
2
*
BLOCKBYTES
)
{
LE_STR_64
(
tmp
,
lfsr
);
UPDATE_LFSR
(
lfsr
);
LE_STR_64
(
tmp
+
BLOCKBYTES
,
lfsr
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
+
BLOCKBYTES
);
skinny128_384_encrypt
(
tmp
,
tmp
+
BLOCKBYTES
,
ad
,
ad
+
BLOCKBYTES
,
*
tk
);
xor_block
(
auth
,
tmp
);
...
...
@@ -47,6 +45,9 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
adlen
-=
2
*
BLOCKBYTES
;
ad
+=
2
*
BLOCKBYTES
;
UPDATE_LFSR
(
lfsr
);
memset
(
tmp
,
0x00
,
2
*
BLOCKBYTES
);
// to save 32 bytes of RAM
SET_DOMAIN
(
tmp
,
0x02
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
}
if
(
adlen
>
BLOCKBYTES
)
{
// pad and process 2 blocs in //
LE_STR_64
(
tmp
,
lfsr
);
...
...
@@ -65,11 +66,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
LE_STR_64
(
tmp
,
lfsr
);
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
skinny128_384_encrypt
(
auth
,
c
,
ad
,
c
,
*
tk
);
skinny128_384_encrypt
(
tmp
,
c
,
ad
,
c
,
*
tk
);
}
else
{
// if tag has been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
skinny128_384_encrypt
(
auth
,
auth
,
ad
,
ad
,
*
tk
);
skinny128_384_encrypt
(
tmp
,
tmp
,
ad
,
ad
,
*
tk
);
}
xor_block
(
auth
,
tmp
);
}
else
if
(
adlen
>
0
)
{
LE_STR_64
(
tmp
,
lfsr
);
SET_DOMAIN
(
tmp
,
0x03
);
// domain for padding ad
...
...
@@ -78,11 +80,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
tmp
[
BLOCKBYTES
+
adlen
]
^=
0x80
;
// padding
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
skinny128_384_encrypt
(
auth
,
c
,
tmp
+
BLOCKBYTES
,
c
,
*
tk
);
skinny128_384_encrypt
(
tmp
,
c
,
tmp
+
BLOCKBYTES
,
c
,
*
tk
);
}
else
{
// if tag has been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
skinny128_384_encrypt
(
auth
,
auth
,
tmp
+
BLOCKBYTES
,
tmp
+
BLOCKBYTES
,
*
tk
);
skinny128_384_encrypt
(
tmp
,
tmp
,
tmp
+
BLOCKBYTES
,
tmp
+
BLOCKBYTES
,
*
tk
);
}
xor_block
(
auth
,
tmp
);
}
}
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/skinny128.c
View file @
60a8ce1b
...
...
@@ -16,12 +16,9 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "tk_schedule.h"
/****************************************************************************
* The MixColumns operation for rounds i such that (i % 4) == 0.
...
...
@@ -84,7 +81,7 @@ void mixcolumns_3(u32* state) {
}
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 0
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 0
****************************************************************************/
void
inv_mixcolumns_0
(
u32
*
state
)
{
u32
tmp
;
...
...
@@ -99,7 +96,7 @@ void inv_mixcolumns_0(u32* state) {
}
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 1
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 1
****************************************************************************/
void
inv_mixcolumns_1
(
u32
*
state
)
{
u32
tmp
;
...
...
@@ -114,7 +111,7 @@ void inv_mixcolumns_1(u32* state) {
}
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 2
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 2
****************************************************************************/
void
inv_mixcolumns_2
(
u32
*
state
)
{
u32
tmp
;
...
...
@@ -129,7 +126,7 @@ void inv_mixcolumns_2(u32* state) {
}
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 3
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 3
****************************************************************************/
void
inv_mixcolumns_3
(
u32
*
state
)
{
u32
tmp
;
...
...
@@ -166,20 +163,8 @@ void skinny128_384_encrypt(u8* ctext, u8* ctext_bis, const u8* ptext,
const
u8
*
ptext_bis
,
const
tweakey
tk
)
{
u32
state
[
8
];
packing
(
state
,
ptext
,
ptext_bis
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
160
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
192
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
224
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
256
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
288
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
320
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
352
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
384
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
416
);
for
(
int
i
=
0
;
i
<
14
;
i
++
)
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
(
i
%
4
)
*
32
,
tk
.
rtk2_3
+
i
*
32
);
unpacking
(
ctext
,
ctext_bis
,
state
);
}
...
...
@@ -192,19 +177,7 @@ void skinny128_384_decrypt(u8* ptext, u8* ptext_bis, const u8* ctext,
const
u8
*
ctext_bis
,
const
tweakey
tk
)
{
u32
state
[
8
];
packing
(
state
,
ctext
,
ctext_bis
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
416
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
384
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
352
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
320
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
288
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
256
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
224
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
192
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
160
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
128
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
96
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
64
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
32
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
);
for
(
int
i
=
13
;
i
>=
0
;
i
--
)
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
(
i
%
4
)
*
32
,
tk
.
rtk2_3
+
i
*
32
);
unpacking
(
ptext
,
ptext_bis
,
state
);
}
\ No newline at end of file
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/skinnyaead.h
View file @
60a8ce1b
...
...
@@ -3,9 +3,7 @@
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
typedef
unsigned
long
long
u64
;
typedef
uint64_t
u64
;
#define TAGBYTES 16
#define KEYBYTES 16
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/tk_schedule.c
View file @
60a8ce1b
...
...
@@ -9,13 +9,9 @@
*
* @date May 2020
*******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "tk_schedule.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
/****************************************************************************
* The round constants according to the fixsliced representation.
****************************************************************************/
...
...
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/tk_schedule.h
View file @
60a8ce1b
#ifndef TK_SCHEDULE_BS_H_
#define TK_SCHEDULE_BS_H_
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
#include <stdint.h>
typedef
uint8_t
u8
;
typedef
uint32_t
u32
;
typedef
struct
{
u32
rtk1
[
8
*
16
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment