Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
lwc
/
candidates
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
cad26506
authored
4 years ago
by
Enrico Pozzobon
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'email-submissions'
parents
121de979
a3a77713
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
36 changed files
with
666 additions
and
382 deletions
+666
-382
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v6m.S
+5
-1
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m.S
+0
-0
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu.S
+0
-0
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu_x.S
+0
-0
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm_selector.h
+44
-12
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/encrypt.c
+2
-2
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.h
+1
-1
romulus/Implementations/crypto_aead/romulusm1+/opt32/skinny128.c
+3
-10
romulus/Implementations/crypto_aead/romulusm1+/opt32/tk_schedule.c
+85
-15
romulus/Implementations/crypto_aead/romulusm1/opt32/skinny128.c
+3
-14
romulus/Implementations/crypto_aead/romulusm1/opt32/tk_schedule.c
+86
-17
romulus/Implementations/crypto_aead/romulusn1+/opt32/skinny128.c
+3
-10
romulus/Implementations/crypto_aead/romulusn1+/opt32/tk_schedule.c
+85
-15
romulus/Implementations/crypto_aead/romulusn1/opt32/skinny128.c
+3
-14
romulus/Implementations/crypto_aead/romulusn1/opt32/tk_schedule.c
+86
-17
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/encrypt.c
+1
-3
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/skinny128.c
+6
-24
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/skinnyaead.h
+1
-3
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/tk_schedule.c
+87
-22
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/tk_schedule.h
+4
-10
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/encrypt.c
+13
-11
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinny128.c
+10
-30
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinny128.h
+1
-0
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinnyaead.h
+1
-3
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/tk_schedule.c
+1
-5
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/tk_schedule.h
+4
-2
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/encrypt.c
+3
-14
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/skinny128.c
+6
-34
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/skinnyaead.h
+1
-3
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/tk_schedule.c
+88
-24
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/tk_schedule.h
+5
-10
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/encrypt.c
+13
-11
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/skinny128.c
+10
-36
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/skinnyaead.h
+1
-3
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/tk_schedule.c
+0
-4
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/tk_schedule.h
+4
-2
No files found.
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v6m.S
View file @
cad26506
...
@@ -4,8 +4,12 @@ Sebastien Riou, May 27th 2020
...
@@ -4,8 +4,12 @@ Sebastien Riou, May 27th 2020
Implementation optimized for ARM-Cortex-M0 (Size and Speed)
Implementation optimized for ARM-Cortex-M0 (Size and Speed)
*/
*/
//define __DRYGASCON_ARM_SELECTOR_V6M__ or add drygascon128_arm_selector.h to includes
#if defined(__DRYGASCON_ARM_SELECTOR_H__)
#ifndef __DRYGASCON_ARM_SELECTOR_V6M__
#include "drygascon128_arm_selector.h"
#endif
#if defined(__DRYGASCON_ARM_SELECTOR_V6M__)
.cpu cortex-m0
.cpu cortex-m0
.syntax unified
.syntax unified
.code 16
.code 16
...
...
This diff is collapsed.
Click to expand it.
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m.S
View file @
cad26506
This diff is collapsed.
Click to expand it.
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu.S
View file @
cad26506
This diff is collapsed.
Click to expand it.
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm-v7m_fpu_x.S
View file @
cad26506
This diff is collapsed.
Click to expand it.
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/drygascon128_arm_selector.h
View file @
cad26506
...
@@ -3,41 +3,73 @@
...
@@ -3,41 +3,73 @@
//Optional file to select the best implementation for each chip
//Optional file to select the best implementation for each chip
#ifdef STM32H743xx
#ifdef STM32H743xx
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_V7M_
FPU_
_
#define __DRYGASCON_ARM_SELECTOR_F
PU
__
#define __DRYGASCON_ARM_SELECTOR_F
OUND
__
#endif
#endif
#ifdef STM32F746xx
#ifdef STM32F746xx
#define __DRYGASCON_ARM_SELECTOR_V7M_FPU__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#ifdef STM32F411xx
#define __DRYGASCON_ARM_SELECTOR_V7M_FPU__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#ifdef STM32L552xx //technically it is V8M but we don't have a specific code for that one
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_F
PU
__
#define __DRYGASCON_ARM_SELECTOR_F
OUND
__
#endif
#endif
#ifdef STM32F103xx
#ifdef STM32F103xx
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#endif
#ifdef STM32L011xx
#ifdef STM32L011xx
#define __DRYGASCON_ARM_SELECTOR_V6M__
#define __DRYGASCON_ARM_SELECTOR_V6M__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#endif
#ifdef __SAM3X8E__
#ifdef __SAM3X8E__
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#endif
//TODO: add more chips here
//TODO: add more chips here
#ifdef __DRYGASCON_ARM_SELECTOR_V7M__
#ifndef __DRYGASCON_ARM_SELECTOR_FOUND__
#ifdef __DRYGASCON_ARM_SELECTOR_FPU__
//more generic defines catching whole families
#define DRYGASCON_G_OPT drygascon128_g_v7m_fpu
#if defined(STM32F4xx) || defined(STM32F7xx) || defined(STM32H7xx)
#define DRYGASCON_F_OPT drygascon128_f_v7m_fpu
#define __DRYGASCON_ARM_SELECTOR_V7M_FPU__
#define DRYGASCON_G0_OPT drygascon128_g0_v7m_fpu
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#else
#endif
#define DRYGASCON_G_OPT drygascon128_g_v7m
#define DRYGASCON_F_OPT drygascon128_f_v7m
#if defined(STM32F1xx)
#define DRYGASCON_G0_OPT drygascon128_g0_v7m
#define __DRYGASCON_ARM_SELECTOR_V7M__
#define __DRYGASCON_ARM_SELECTOR_FOUND__
#endif
#endif
#endif
#endif
#ifdef __DRYGASCON_ARM_SELECTOR_V7M_FPU__
#define DRYGASCON_G_OPT drygascon128_g_v7m_fpu
#define DRYGASCON_F_OPT drygascon128_f_v7m_fpu
#define DRYGASCON_G0_OPT drygascon128_g0_v7m_fpu
#endif
#ifdef __DRYGASCON_ARM_SELECTOR_V7M_FPU_X__
#define DRYGASCON_G_OPT drygascon128_g_v7m_fpu_x
#define DRYGASCON_F_OPT drygascon128_f_v7m_fpu_x
#define DRYGASCON_G0_OPT drygascon128_g0_v7m_fpu_x
#endif
#ifdef __DRYGASCON_ARM_SELECTOR_V7M__
#define DRYGASCON_G_OPT drygascon128_g_v7m
#define DRYGASCON_F_OPT drygascon128_f_v7m
#define DRYGASCON_G0_OPT drygascon128_g0_v7m
#endif
#ifdef __DRYGASCON_ARM_SELECTOR_V6M__
#ifdef __DRYGASCON_ARM_SELECTOR_V6M__
#define DRYGASCON_G_OPT drygascon128_g_v6m
#define DRYGASCON_G_OPT drygascon128_g_v6m
#define DRYGASCON_F_OPT drygascon128_f_v6m
#define DRYGASCON_F_OPT drygascon128_f_v6m
...
...
This diff is collapsed.
Click to expand it.
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/encrypt.c
View file @
cad26506
...
@@ -8,7 +8,7 @@ int crypto_aead_encrypt
...
@@ -8,7 +8,7 @@ int crypto_aead_encrypt
const
unsigned
char
*
npub
,
const
unsigned
char
*
npub
,
const
unsigned
char
*
k
)
const
unsigned
char
*
k
)
{
{
return
drygascon128_aead_encrypt
return
drygascon128
k16
_aead_encrypt
(
c
,
clen
,
m
,
mlen
,
ad
,
adlen
,
nsec
,
npub
,
k
);
(
c
,
clen
,
m
,
mlen
,
ad
,
adlen
,
nsec
,
npub
,
k
);
}
}
...
@@ -20,6 +20,6 @@ int crypto_aead_decrypt
...
@@ -20,6 +20,6 @@ int crypto_aead_decrypt
const
unsigned
char
*
npub
,
const
unsigned
char
*
npub
,
const
unsigned
char
*
k
)
const
unsigned
char
*
k
)
{
{
return
drygascon128_aead_decrypt
return
drygascon128
k16
_aead_decrypt
(
m
,
mlen
,
nsec
,
c
,
clen
,
ad
,
adlen
,
npub
,
k
);
(
m
,
mlen
,
nsec
,
c
,
clen
,
ad
,
adlen
,
npub
,
k
);
}
}
This diff is collapsed.
Click to expand it.
drygascon/Implementations/crypto_aead/drygascon128/add_arm_cortex-m/internal-drysponge.h
View file @
cad26506
...
@@ -245,7 +245,7 @@ typedef union
...
@@ -245,7 +245,7 @@ typedef union
*/
*/
typedef
struct
typedef
struct
{
{
gascon128_state_t
c
;
/**< GASCON-128 state for the capacity */
gascon128_state_t
c
;
/**< GASCON-128 state for the capacity */
uint32_t
domain
;
/**< Domain value to mix on next F call */
uint32_t
domain
;
/**< Domain value to mix on next F call */
uint32_t
rounds
;
/**< Number of rounds for next G call */
uint32_t
rounds
;
/**< Number of rounds for next G call */
drysponge128_rate_t
r
;
/**< Buffer for a rate block of data */
drysponge128_rate_t
r
;
/**< Buffer for a rate block of data */
...
...
This diff is collapsed.
Click to expand it.
romulus/Implementations/crypto_aead/romulusm1+/opt32/skinny128.c
View file @
cad26506
...
@@ -93,15 +93,7 @@ void skinny128_384_plus(u8* ctext, const u8* ptext, const u32* rtk1,
...
@@ -93,15 +93,7 @@ void skinny128_384_plus(u8* ctext, const u8* ptext, const u32* rtk1,
u32
tmp
;
// used in SWAPMOVE macro
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
packing
(
state
,
ptext
);
// from byte to bitsliced representation
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
romulus/Implementations/crypto_aead/romulusm1+/opt32/tk_schedule.c
View file @
cad26506
...
@@ -260,6 +260,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
...
@@ -260,6 +260,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
}
}
/******************************************************************************
/******************************************************************************
* Precompute LFSR2(TK2) ^ LFSR3(TK3) for all round tweakeys.
* It is equivalent to the following 2 function calls:
* - precompute_lfsr_tk2(tk, t2, SKINNY128_384_ROUNDS);
* - precompute_lfsr_tk3(tk, t3, SKINNY128_384_ROUNDS);
* However 'precompute_lfsr_tk2_3' can allow to save cycles on some platform.
* On ARMv7 one should observe a gain of ~1k cycles per function call. It can be
* explained by the fact that less memory accesses to 'tk' are computed.
*
* To save some code size, the loop can be replaced by the following one:
* for(int i = 0 ; i < rounds; i+=2) {
* lfsr2_bs(tk2);
* lfsr3_bs(tk3);
* tk[i*4+4] = tk2[0] ^ tk3[0];
* tk[i*4+5] = tk2[1] ^ tk3[1];
* tk[i*4+6] = tk2[2] ^ tk3[2];
* tk[i*4+7] = tk2[3] ^ tk3[3];
* }
* at the cost of some cycles (~260 on ARM Cortex-M).
******************************************************************************/
void
precompute_lfsr_tk2_3
(
u32
*
tk
,
const
u8
*
t2
,
const
u8
*
t3
,
const
int
rounds
)
{
u32
tmp
,
tk2
[
4
],
tk3
[
4
];
packing
(
tk2
,
t2
);
packing
(
tk3
,
t3
);
tk
[
0
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
1
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
2
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
3
]
=
tk2
[
3
]
^
tk3
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
tk2
[
0
]
^=
(
tk2
[
2
]
&
0xaaaaaaaa
);
tk2
[
0
]
=
((
tk2
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
3
]
^=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
3
]
=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
4
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
5
]
=
tk2
[
2
]
^
tk3
[
0
];
tk
[
i
*
4
+
6
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
7
]
=
tk2
[
0
]
^
tk3
[
2
];
tk2
[
1
]
^=
(
tk2
[
3
]
&
0xaaaaaaaa
);
tk2
[
1
]
=
((
tk2
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
2
]
^=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
2
]
=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
12
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
13
]
=
tk2
[
3
]
^
tk3
[
3
];
tk
[
i
*
4
+
14
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
15
]
=
tk2
[
1
]
^
tk3
[
1
];
tk2
[
2
]
^=
(
tk2
[
0
]
&
0xaaaaaaaa
);
tk2
[
2
]
=
((
tk2
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
1
]
^=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
1
]
=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
20
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
21
]
=
tk2
[
0
]
^
tk3
[
2
];
tk
[
i
*
4
+
22
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
23
]
=
tk2
[
2
]
^
tk3
[
0
];
tk2
[
3
]
^=
(
tk2
[
1
]
&
0xaaaaaaaa
);
tk2
[
3
]
=
((
tk2
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
0
]
^=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
0
]
=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
28
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
29
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
i
*
4
+
30
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
31
]
=
tk2
[
3
]
^
tk3
[
3
];
}
}
/******************************************************************************
* XOR TK with TK1 before applying the permutations.
* XOR TK with TK1 before applying the permutations.
* The key is then rearranged to match the barrel shiftrows representation.
* The key is then rearranged to match the barrel shiftrows representation.
******************************************************************************/
******************************************************************************/
...
@@ -267,19 +331,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -267,19 +331,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
u32
test
;
u32
test
;
u32
tk1
[
4
],
tmp
[
4
];
u32
tk1
[
4
],
tmp
[
4
];
packing
(
tk1
,
key
);
packing
(
tk1
,
key
);
memcpy
(
tmp
,
tk
,
16
);
tmp
[
0
]
=
tk
[
0
]
^
tk1
[
0
];
tmp
[
0
]
^=
tk1
[
0
];
tmp
[
1
]
=
tk
[
1
]
^
tk1
[
1
];
tmp
[
1
]
^=
tk1
[
1
];
tmp
[
2
]
=
tk
[
2
]
^
tk1
[
2
];
tmp
[
2
]
^=
tk1
[
2
];
tmp
[
3
]
=
tk
[
3
]
^
tk1
[
3
];
tmp
[
3
]
^=
tk1
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
4
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
4
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
5
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
6
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
7
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_2
(
tmp
);
// applies P^2
permute_tk_2
(
tmp
);
// applies P^2
else
else
...
@@ -296,8 +361,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -296,8 +361,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
12
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
12
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
13
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
14
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
15
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_4
(
tmp
);
// applies P^4
permute_tk_4
(
tmp
);
// applies P^4
else
else
...
@@ -310,8 +377,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -310,8 +377,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
20
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
20
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
21
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
22
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
23
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_6
(
tmp
);
// applies P^6
permute_tk_6
(
tmp
);
// applies P^6
else
else
...
@@ -328,8 +397,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -328,8 +397,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
28
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
28
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
29
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
30
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
31
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_8
(
tmp
);
// applies P^8
permute_tk_8
(
tmp
);
// applies P^8
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
...
@@ -350,8 +421,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -350,8 +421,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
******************************************************************************/
******************************************************************************/
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2
(
rtk
,
tk2
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2_3
(
rtk
,
tk2
,
tk3
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk3
(
rtk
,
tk3
,
SKINNY128_384_ROUNDS
);
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
...
This diff is collapsed.
Click to expand it.
romulus/Implementations/crypto_aead/romulusm1/opt32/skinny128.c
View file @
cad26506
...
@@ -92,19 +92,7 @@ void skinny128_384(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2
...
@@ -92,19 +92,7 @@ void skinny128_384(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2
u32
tmp
;
// used in SWAPMOVE macro
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
packing
(
state
,
ptext
);
// from byte to bitsliced representation
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
for
(
int
i
=
0
;
i
<
14
;
i
++
)
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
160
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
176
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
192
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
208
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
romulus/Implementations/crypto_aead/romulusm1/opt32/tk_schedule.c
View file @
cad26506
...
@@ -271,6 +271,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
...
@@ -271,6 +271,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
}
}
/******************************************************************************
/******************************************************************************
* Precompute LFSR2(TK2) ^ LFSR3(TK3) for all round tweakeys.
* It is equivalent to the following 2 function calls:
* - precompute_lfsr_tk2(tk, t2, SKINNY128_384_ROUNDS);
* - precompute_lfsr_tk3(tk, t3, SKINNY128_384_ROUNDS);
* However 'precompute_lfsr_tk2_3' can allow to save cycles on some platform.
* On ARMv7 one should observe a gain of ~1k cycles per function call. It can be
* explained by the fact that less memory accesses to 'tk' are computed.
*
* To save some code size, the loop can be replaced by the following one:
* for(int i = 0 ; i < rounds; i+=2) {
* lfsr2_bs(tk2);
* lfsr3_bs(tk3);
* tk[i*4+4] = tk2[0] ^ tk3[0];
* tk[i*4+5] = tk2[1] ^ tk3[1];
* tk[i*4+6] = tk2[2] ^ tk3[2];
* tk[i*4+7] = tk2[3] ^ tk3[3];
* }
* at the cost of some cycles (~260 on ARM Cortex-M).
******************************************************************************/
void
precompute_lfsr_tk2_3
(
u32
*
tk
,
const
u8
*
t2
,
const
u8
*
t3
,
const
int
rounds
)
{
u32
tmp
,
tk2
[
4
],
tk3
[
4
];
packing
(
tk2
,
t2
);
packing
(
tk3
,
t3
);
tk
[
0
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
1
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
2
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
3
]
=
tk2
[
3
]
^
tk3
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
tk2
[
0
]
^=
(
tk2
[
2
]
&
0xaaaaaaaa
);
tk2
[
0
]
=
((
tk2
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
3
]
^=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
3
]
=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
4
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
5
]
=
tk2
[
2
]
^
tk3
[
0
];
tk
[
i
*
4
+
6
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
7
]
=
tk2
[
0
]
^
tk3
[
2
];
tk2
[
1
]
^=
(
tk2
[
3
]
&
0xaaaaaaaa
);
tk2
[
1
]
=
((
tk2
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
2
]
^=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
2
]
=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
12
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
13
]
=
tk2
[
3
]
^
tk3
[
3
];
tk
[
i
*
4
+
14
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
15
]
=
tk2
[
1
]
^
tk3
[
1
];
tk2
[
2
]
^=
(
tk2
[
0
]
&
0xaaaaaaaa
);
tk2
[
2
]
=
((
tk2
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
1
]
^=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
1
]
=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
20
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
21
]
=
tk2
[
0
]
^
tk3
[
2
];
tk
[
i
*
4
+
22
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
23
]
=
tk2
[
2
]
^
tk3
[
0
];
tk2
[
3
]
^=
(
tk2
[
1
]
&
0xaaaaaaaa
);
tk2
[
3
]
=
((
tk2
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
0
]
^=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
0
]
=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
28
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
29
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
i
*
4
+
30
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
31
]
=
tk2
[
3
]
^
tk3
[
3
];
}
}
/******************************************************************************
* XOR TK with TK1 before applying the permutations.
* XOR TK with TK1 before applying the permutations.
* The key is then rearranged to match the barrel shiftrows representation.
* The key is then rearranged to match the barrel shiftrows representation.
******************************************************************************/
******************************************************************************/
...
@@ -278,19 +342,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -278,19 +342,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
u32
test
;
u32
test
;
u32
tk1
[
4
],
tmp
[
4
];
u32
tk1
[
4
],
tmp
[
4
];
packing
(
tk1
,
key
);
packing
(
tk1
,
key
);
memcpy
(
tmp
,
tk
,
16
);
tmp
[
0
]
=
tk
[
0
]
^
tk1
[
0
];
tmp
[
0
]
^=
tk1
[
0
];
tmp
[
1
]
=
tk
[
1
]
^
tk1
[
1
];
tmp
[
1
]
^=
tk1
[
1
];
tmp
[
2
]
=
tk
[
2
]
^
tk1
[
2
];
tmp
[
2
]
^=
tk1
[
2
];
tmp
[
3
]
=
tk
[
3
]
^
tk1
[
3
];
tmp
[
3
]
^=
tk1
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
4
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
4
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
5
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
6
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
7
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_2
(
tmp
);
// applies P^2
permute_tk_2
(
tmp
);
// applies P^2
else
else
...
@@ -307,8 +372,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -307,8 +372,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
12
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
12
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
13
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
14
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
15
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_4
(
tmp
);
// applies P^4
permute_tk_4
(
tmp
);
// applies P^4
else
else
...
@@ -321,8 +388,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -321,8 +388,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
20
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
20
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
21
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
22
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
23
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_6
(
tmp
);
// applies P^6
permute_tk_6
(
tmp
);
// applies P^6
else
else
...
@@ -339,8 +408,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -339,8 +408,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
28
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
28
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
29
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
30
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
31
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_8
(
tmp
);
// applies P^8
permute_tk_8
(
tmp
);
// applies P^8
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
...
@@ -361,8 +432,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -361,8 +432,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
******************************************************************************/
******************************************************************************/
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2
(
rtk
,
tk2
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2_3
(
rtk
,
tk2
,
tk3
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk3
(
rtk
,
tk3
,
SKINNY128_384_ROUNDS
);
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
@@ -376,4 +446,4 @@ void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8 * tk3) {
...
@@ -376,4 +446,4 @@ void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8 * tk3) {
void
precompute_rtk1
(
u32
*
rtk1
,
const
u8
*
tk1
)
{
void
precompute_rtk1
(
u32
*
rtk1
,
const
u8
*
tk1
)
{
memset
(
rtk1
,
0x00
,
16
*
16
);
memset
(
rtk1
,
0x00
,
16
*
16
);
permute_tk
(
rtk1
,
tk1
,
16
);
permute_tk
(
rtk1
,
tk1
,
16
);
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
romulus/Implementations/crypto_aead/romulusn1+/opt32/skinny128.c
View file @
cad26506
...
@@ -93,15 +93,7 @@ void skinny128_384_plus(u8* ctext, const u8* ptext, const u32* rtk1,
...
@@ -93,15 +93,7 @@ void skinny128_384_plus(u8* ctext, const u8* ptext, const u32* rtk1,
u32
tmp
;
// used in SWAPMOVE macro
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
packing
(
state
,
ptext
);
// from byte to bitsliced representation
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
romulus/Implementations/crypto_aead/romulusn1+/opt32/tk_schedule.c
View file @
cad26506
...
@@ -260,6 +260,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
...
@@ -260,6 +260,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
}
}
/******************************************************************************
/******************************************************************************
* Precompute LFSR2(TK2) ^ LFSR3(TK3) for all round tweakeys.
* It is equivalent to the following 2 function calls:
* - precompute_lfsr_tk2(tk, t2, SKINNY128_384_ROUNDS);
* - precompute_lfsr_tk3(tk, t3, SKINNY128_384_ROUNDS);
* However 'precompute_lfsr_tk2_3' can allow to save cycles on some platform.
* On ARMv7 one should observe a gain of ~1k cycles per function call. It can be
* explained by the fact that less memory accesses to 'tk' are computed.
*
* To save some code size, the loop can be replaced by the following one:
* for(int i = 0 ; i < rounds; i+=2) {
* lfsr2_bs(tk2);
* lfsr3_bs(tk3);
* tk[i*4+4] = tk2[0] ^ tk3[0];
* tk[i*4+5] = tk2[1] ^ tk3[1];
* tk[i*4+6] = tk2[2] ^ tk3[2];
* tk[i*4+7] = tk2[3] ^ tk3[3];
* }
* at the cost of some cycles (~260 on ARM Cortex-M).
******************************************************************************/
void
precompute_lfsr_tk2_3
(
u32
*
tk
,
const
u8
*
t2
,
const
u8
*
t3
,
const
int
rounds
)
{
u32
tmp
,
tk2
[
4
],
tk3
[
4
];
packing
(
tk2
,
t2
);
packing
(
tk3
,
t3
);
tk
[
0
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
1
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
2
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
3
]
=
tk2
[
3
]
^
tk3
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
tk2
[
0
]
^=
(
tk2
[
2
]
&
0xaaaaaaaa
);
tk2
[
0
]
=
((
tk2
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
3
]
^=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
3
]
=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
4
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
5
]
=
tk2
[
2
]
^
tk3
[
0
];
tk
[
i
*
4
+
6
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
7
]
=
tk2
[
0
]
^
tk3
[
2
];
tk2
[
1
]
^=
(
tk2
[
3
]
&
0xaaaaaaaa
);
tk2
[
1
]
=
((
tk2
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
2
]
^=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
2
]
=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
12
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
13
]
=
tk2
[
3
]
^
tk3
[
3
];
tk
[
i
*
4
+
14
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
15
]
=
tk2
[
1
]
^
tk3
[
1
];
tk2
[
2
]
^=
(
tk2
[
0
]
&
0xaaaaaaaa
);
tk2
[
2
]
=
((
tk2
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
1
]
^=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
1
]
=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
20
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
21
]
=
tk2
[
0
]
^
tk3
[
2
];
tk
[
i
*
4
+
22
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
23
]
=
tk2
[
2
]
^
tk3
[
0
];
tk2
[
3
]
^=
(
tk2
[
1
]
&
0xaaaaaaaa
);
tk2
[
3
]
=
((
tk2
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
0
]
^=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
0
]
=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
28
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
29
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
i
*
4
+
30
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
31
]
=
tk2
[
3
]
^
tk3
[
3
];
}
}
/******************************************************************************
* XOR TK with TK1 before applying the permutations.
* XOR TK with TK1 before applying the permutations.
* The key is then rearranged to match the barrel shiftrows representation.
* The key is then rearranged to match the barrel shiftrows representation.
******************************************************************************/
******************************************************************************/
...
@@ -267,19 +331,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -267,19 +331,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
u32
test
;
u32
test
;
u32
tk1
[
4
],
tmp
[
4
];
u32
tk1
[
4
],
tmp
[
4
];
packing
(
tk1
,
key
);
packing
(
tk1
,
key
);
memcpy
(
tmp
,
tk
,
16
);
tmp
[
0
]
=
tk
[
0
]
^
tk1
[
0
];
tmp
[
0
]
^=
tk1
[
0
];
tmp
[
1
]
=
tk
[
1
]
^
tk1
[
1
];
tmp
[
1
]
^=
tk1
[
1
];
tmp
[
2
]
=
tk
[
2
]
^
tk1
[
2
];
tmp
[
2
]
^=
tk1
[
2
];
tmp
[
3
]
=
tk
[
3
]
^
tk1
[
3
];
tmp
[
3
]
^=
tk1
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
4
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
4
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
5
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
6
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
7
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_2
(
tmp
);
// applies P^2
permute_tk_2
(
tmp
);
// applies P^2
else
else
...
@@ -296,8 +361,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -296,8 +361,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
12
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
12
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
13
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
14
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
15
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_4
(
tmp
);
// applies P^4
permute_tk_4
(
tmp
);
// applies P^4
else
else
...
@@ -310,8 +377,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -310,8 +377,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
20
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
20
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
21
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
22
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
23
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_6
(
tmp
);
// applies P^6
permute_tk_6
(
tmp
);
// applies P^6
else
else
...
@@ -328,8 +397,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -328,8 +397,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
28
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
28
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
29
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
30
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
31
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_8
(
tmp
);
// applies P^8
permute_tk_8
(
tmp
);
// applies P^8
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
...
@@ -350,8 +421,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -350,8 +421,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
******************************************************************************/
******************************************************************************/
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2
(
rtk
,
tk2
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2_3
(
rtk
,
tk2
,
tk3
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk3
(
rtk
,
tk3
,
SKINNY128_384_ROUNDS
);
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
...
This diff is collapsed.
Click to expand it.
romulus/Implementations/crypto_aead/romulusn1/opt32/skinny128.c
View file @
cad26506
...
@@ -92,19 +92,7 @@ void skinny128_384(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2
...
@@ -92,19 +92,7 @@ void skinny128_384(u8* ctext, const u8* ptext, const u32* rtk1, const u32* rtk2
u32
tmp
;
// used in SWAPMOVE macro
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
packing
(
state
,
ptext
);
// from byte to bitsliced representation
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
for
(
int
i
=
0
;
i
<
14
;
i
++
)
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
160
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
176
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
192
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
208
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
romulus/Implementations/crypto_aead/romulusn1/opt32/tk_schedule.c
View file @
cad26506
...
@@ -271,6 +271,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
...
@@ -271,6 +271,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
}
}
/******************************************************************************
/******************************************************************************
* Precompute LFSR2(TK2) ^ LFSR3(TK3) for all round tweakeys.
* It is equivalent to the following 2 function calls:
* - precompute_lfsr_tk2(tk, t2, SKINNY128_384_ROUNDS);
* - precompute_lfsr_tk3(tk, t3, SKINNY128_384_ROUNDS);
* However 'precompute_lfsr_tk2_3' can allow to save cycles on some platform.
* On ARMv7 one should observe a gain of ~1k cycles per function call. It can be
* explained by the fact that less memory accesses to 'tk' are computed.
*
* To save some code size, the loop can be replaced by the following one:
* for(int i = 0 ; i < rounds; i+=2) {
* lfsr2_bs(tk2);
* lfsr3_bs(tk3);
* tk[i*4+4] = tk2[0] ^ tk3[0];
* tk[i*4+5] = tk2[1] ^ tk3[1];
* tk[i*4+6] = tk2[2] ^ tk3[2];
* tk[i*4+7] = tk2[3] ^ tk3[3];
* }
* at the cost of some cycles (~260 on ARM Cortex-M).
******************************************************************************/
void
precompute_lfsr_tk2_3
(
u32
*
tk
,
const
u8
*
t2
,
const
u8
*
t3
,
const
int
rounds
)
{
u32
tmp
,
tk2
[
4
],
tk3
[
4
];
packing
(
tk2
,
t2
);
packing
(
tk3
,
t3
);
tk
[
0
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
1
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
2
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
3
]
=
tk2
[
3
]
^
tk3
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
tk2
[
0
]
^=
(
tk2
[
2
]
&
0xaaaaaaaa
);
tk2
[
0
]
=
((
tk2
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
3
]
^=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
3
]
=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
4
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
5
]
=
tk2
[
2
]
^
tk3
[
0
];
tk
[
i
*
4
+
6
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
7
]
=
tk2
[
0
]
^
tk3
[
2
];
tk2
[
1
]
^=
(
tk2
[
3
]
&
0xaaaaaaaa
);
tk2
[
1
]
=
((
tk2
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
2
]
^=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
2
]
=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
12
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
13
]
=
tk2
[
3
]
^
tk3
[
3
];
tk
[
i
*
4
+
14
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
15
]
=
tk2
[
1
]
^
tk3
[
1
];
tk2
[
2
]
^=
(
tk2
[
0
]
&
0xaaaaaaaa
);
tk2
[
2
]
=
((
tk2
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
1
]
^=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
1
]
=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
20
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
21
]
=
tk2
[
0
]
^
tk3
[
2
];
tk
[
i
*
4
+
22
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
23
]
=
tk2
[
2
]
^
tk3
[
0
];
tk2
[
3
]
^=
(
tk2
[
1
]
&
0xaaaaaaaa
);
tk2
[
3
]
=
((
tk2
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
0
]
^=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
0
]
=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
28
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
29
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
i
*
4
+
30
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
31
]
=
tk2
[
3
]
^
tk3
[
3
];
}
}
/******************************************************************************
* XOR TK with TK1 before applying the permutations.
* XOR TK with TK1 before applying the permutations.
* The key is then rearranged to match the barrel shiftrows representation.
* The key is then rearranged to match the barrel shiftrows representation.
******************************************************************************/
******************************************************************************/
...
@@ -278,19 +342,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -278,19 +342,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
u32
test
;
u32
test
;
u32
tk1
[
4
],
tmp
[
4
];
u32
tk1
[
4
],
tmp
[
4
];
packing
(
tk1
,
key
);
packing
(
tk1
,
key
);
memcpy
(
tmp
,
tk
,
16
);
tmp
[
0
]
=
tk
[
0
]
^
tk1
[
0
];
tmp
[
0
]
^=
tk1
[
0
];
tmp
[
1
]
=
tk
[
1
]
^
tk1
[
1
];
tmp
[
1
]
^=
tk1
[
1
];
tmp
[
2
]
=
tk
[
2
]
^
tk1
[
2
];
tmp
[
2
]
^=
tk1
[
2
];
tmp
[
3
]
=
tk
[
3
]
^
tk1
[
3
];
tmp
[
3
]
^=
tk1
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
4
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
4
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
5
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
6
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
7
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_2
(
tmp
);
// applies P^2
permute_tk_2
(
tmp
);
// applies P^2
else
else
...
@@ -307,8 +372,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -307,8 +372,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
12
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
12
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
13
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
14
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
15
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_4
(
tmp
);
// applies P^4
permute_tk_4
(
tmp
);
// applies P^4
else
else
...
@@ -321,8 +388,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -321,8 +388,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
20
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
20
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
21
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
22
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
23
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_6
(
tmp
);
// applies P^6
permute_tk_6
(
tmp
);
// applies P^6
else
else
...
@@ -339,8 +408,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -339,8 +408,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
28
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
28
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
29
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
30
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
31
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_8
(
tmp
);
// applies P^8
permute_tk_8
(
tmp
);
// applies P^8
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
...
@@ -361,8 +432,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -361,8 +432,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
******************************************************************************/
******************************************************************************/
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2
(
rtk
,
tk2
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2_3
(
rtk
,
tk2
,
tk3
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk3
(
rtk
,
tk3
,
SKINNY128_384_ROUNDS
);
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
@@ -376,4 +446,4 @@ void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8 * tk3) {
...
@@ -376,4 +446,4 @@ void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8 * tk3) {
void
precompute_rtk1
(
u32
*
rtk1
,
const
u8
*
tk1
)
{
void
precompute_rtk1
(
u32
*
rtk1
,
const
u8
*
tk1
)
{
memset
(
rtk1
,
0x00
,
16
*
16
);
memset
(
rtk1
,
0x00
,
16
*
16
);
permute_tk
(
rtk1
,
tk1
,
16
);
permute_tk
(
rtk1
,
tk1
,
16
);
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/encrypt.c
View file @
cad26506
...
@@ -8,12 +8,10 @@
...
@@ -8,12 +8,10 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* alexandre.adomnicai@ntu.edu.sg
*
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
******************************************************************************/
#include "skinny128.h"
#include "skinnyaead.h"
#include "skinnyaead.h"
#include <string.h>
#include <string.h>
#include <stdio.h>
/******************************************************************************
/******************************************************************************
* x ^= y where x, y are 128-bit blocks (16 bytes array).
* x ^= y where x, y are 128-bit blocks (16 bytes array).
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/skinny128.c
View file @
cad26506
...
@@ -16,12 +16,9 @@
...
@@ -16,12 +16,9 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* alexandre.adomnicai@ntu.edu.sg
*
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "skinny128.h"
#include "tk_schedule.h"
/******************************************************************************
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 0
* The MixColumns computation for rounds i such that (i % 4) == 0
...
@@ -153,16 +150,8 @@ void skinny128_384_plus_encrypt(u8* ctext, const u8* ptext, const u32* rtk1,
...
@@ -153,16 +150,8 @@ void skinny128_384_plus_encrypt(u8* ctext, const u8* ptext, const u32* rtk1,
u32
tmp
;
// used in SWAPMOVE macro
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
packing
(
state
,
ptext
);
// from byte to bitsliced representation
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
}
...
@@ -176,15 +165,7 @@ void skinny128_384_plus_decrypt(u8* ctext, const u8* ptext, const u32* rtk1,
...
@@ -176,15 +165,7 @@ void skinny128_384_plus_decrypt(u8* ctext, const u8* ptext, const u32* rtk1,
u32
tmp
;
// used in SWAPMOVE macro
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
packing
(
state
,
ptext
);
// from byte to bitsliced representation
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
for
(
int
i
=
9
;
i
>=
0
;
i
--
)
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/skinnyaead.h
View file @
cad26506
...
@@ -3,9 +3,7 @@
...
@@ -3,9 +3,7 @@
#include "skinny128.h"
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
uint64_t
u64
;
typedef
unsigned
int
u32
;
typedef
unsigned
long
long
u64
;
#define TAGBYTES 16
#define TAGBYTES 16
#define KEYBYTES 16
#define KEYBYTES 16
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/tk_schedule.c
View file @
cad26506
...
@@ -4,16 +4,11 @@
...
@@ -4,16 +4,11 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* alexandre.adomnicai@ntu.edu.sg
*
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include <string.h> //for memcmp
#include "tk_schedule.h"
#include "skinny128.h"
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
/******************************************************************************
/******************************************************************************
* The round constants according to the new representation.
* The round constants according to the new representation.
******************************************************************************/
******************************************************************************/
...
@@ -260,6 +255,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
...
@@ -260,6 +255,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
}
}
/******************************************************************************
/******************************************************************************
* Precompute LFSR2(TK2) ^ LFSR3(TK3) for all round tweakeys.
* It is equivalent to the following 2 function calls:
* - precompute_lfsr_tk2(tk, t2, SKINNY128_384_ROUNDS);
* - precompute_lfsr_tk3(tk, t3, SKINNY128_384_ROUNDS);
* However 'precompute_lfsr_tk2_3' can allow to save cycles on some platform.
* On ARMv7 one should observe a gain of ~1k cycles per function call. It can be
* explained by the fact that less memory accesses to 'tk' are computed.
*
* To save some code size, the loop can be replaced by the following one:
* for(int i = 0 ; i < rounds; i+=2) {
* lfsr2_bs(tk2);
* lfsr3_bs(tk3);
* tk[i*4+4] = tk2[0] ^ tk3[0];
* tk[i*4+5] = tk2[1] ^ tk3[1];
* tk[i*4+6] = tk2[2] ^ tk3[2];
* tk[i*4+7] = tk2[3] ^ tk3[3];
* }
* at the cost of some cycles (~260 on ARM Cortex-M).
******************************************************************************/
void
precompute_lfsr_tk2_3
(
u32
*
tk
,
const
u8
*
t2
,
const
u8
*
t3
,
const
int
rounds
)
{
u32
tk2
[
4
],
tk3
[
4
];
packing
(
tk2
,
t2
);
packing
(
tk3
,
t3
);
tk
[
0
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
1
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
2
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
3
]
=
tk2
[
3
]
^
tk3
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
tk2
[
0
]
^=
(
tk2
[
2
]
&
0xaaaaaaaa
);
tk2
[
0
]
=
((
tk2
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
3
]
^=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
3
]
=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
4
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
5
]
=
tk2
[
2
]
^
tk3
[
0
];
tk
[
i
*
4
+
6
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
7
]
=
tk2
[
0
]
^
tk3
[
2
];
tk2
[
1
]
^=
(
tk2
[
3
]
&
0xaaaaaaaa
);
tk2
[
1
]
=
((
tk2
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
2
]
^=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
2
]
=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
12
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
13
]
=
tk2
[
3
]
^
tk3
[
3
];
tk
[
i
*
4
+
14
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
15
]
=
tk2
[
1
]
^
tk3
[
1
];
tk2
[
2
]
^=
(
tk2
[
0
]
&
0xaaaaaaaa
);
tk2
[
2
]
=
((
tk2
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
1
]
^=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
1
]
=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
20
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
21
]
=
tk2
[
0
]
^
tk3
[
2
];
tk
[
i
*
4
+
22
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
23
]
=
tk2
[
2
]
^
tk3
[
0
];
tk2
[
3
]
^=
(
tk2
[
1
]
&
0xaaaaaaaa
);
tk2
[
3
]
=
((
tk2
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
0
]
^=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
0
]
=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
28
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
29
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
i
*
4
+
30
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
31
]
=
tk2
[
3
]
^
tk3
[
3
];
}
}
/******************************************************************************
* XOR TK with TK1 before applying the permutations.
* XOR TK with TK1 before applying the permutations.
* The key is then rearranged to match the barrel shiftrows representation.
* The key is then rearranged to match the barrel shiftrows representation.
******************************************************************************/
******************************************************************************/
...
@@ -267,19 +326,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -267,19 +326,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
u32
test
;
u32
test
;
u32
tk1
[
4
],
tmp
[
4
];
u32
tk1
[
4
],
tmp
[
4
];
packing
(
tk1
,
key
);
packing
(
tk1
,
key
);
memcpy
(
tmp
,
tk
,
16
);
tmp
[
0
]
=
tk
[
0
]
^
tk1
[
0
];
tmp
[
0
]
^=
tk1
[
0
];
tmp
[
1
]
=
tk
[
1
]
^
tk1
[
1
];
tmp
[
1
]
^=
tk1
[
1
];
tmp
[
2
]
=
tk
[
2
]
^
tk1
[
2
];
tmp
[
2
]
^=
tk1
[
2
];
tmp
[
3
]
=
tk
[
3
]
^
tk1
[
3
];
tmp
[
3
]
^=
tk1
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
4
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
4
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
5
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
6
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
7
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_2
(
tmp
);
// applies P^2
permute_tk_2
(
tmp
);
// applies P^2
else
else
...
@@ -296,8 +356,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -296,8 +356,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
12
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
12
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
13
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
14
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
15
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_4
(
tmp
);
// applies P^4
permute_tk_4
(
tmp
);
// applies P^4
else
else
...
@@ -310,8 +372,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -310,8 +372,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
20
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
20
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
21
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
22
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
23
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_6
(
tmp
);
// applies P^6
permute_tk_6
(
tmp
);
// applies P^6
else
else
...
@@ -328,8 +392,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -328,8 +392,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
28
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
28
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
29
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
30
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
31
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_8
(
tmp
);
// applies P^8
permute_tk_8
(
tmp
);
// applies P^8
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
...
@@ -350,8 +416,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -350,8 +416,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
******************************************************************************/
******************************************************************************/
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2
(
rtk
,
tk2
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2_3
(
rtk
,
tk2
,
tk3
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk3
(
rtk
,
tk3
,
SKINNY128_384_ROUNDS
);
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_1/tk_schedule.h
View file @
cad26506
#ifndef TK_SCHEDULE_H_
#ifndef TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
typedef
unsigned
char
u8
;
#include <stdint.h>
typedef
unsigned
int
u32
;
typedef
uint8_t
u8
;
typedef
uint32_t
u32
;
void
packing
(
u32
*
out
,
const
u8
*
in
);
void
packing
(
u32
*
out
,
const
u8
*
in
);
void
unpacking
(
u8
*
out
,
u32
*
in
);
void
unpacking
(
u8
*
out
,
u32
*
in
);
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
);
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
);
void
precompute_rtk1
(
u32
*
rtk1
,
const
u8
*
tk1
);
void
precompute_rtk1
(
u32
*
rtk1
,
const
u8
*
tk1
);
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define XOR_BLOCKS(x,y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
(x)[2] ^= (y)[2]; \
(x)[3] ^= (y)[3]; \
})
#define SWAPMOVE(a, b, mask, n) ({ \
#define SWAPMOVE(a, b, mask, n) ({ \
tmp = (b ^ (a >> n)) & mask; \
tmp = (b ^ (a >> n)) & mask; \
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/encrypt.c
View file @
cad26506
...
@@ -8,12 +8,10 @@
...
@@ -8,12 +8,10 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* alexandre.adomnicai@ntu.edu.sg
*
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
******************************************************************************/
#include "skinny128.h"
#include "skinnyaead.h"
#include "skinnyaead.h"
#include <string.h>
#include <string.h>
#include <stdio.h>
/******************************************************************************
/******************************************************************************
* x ^= y where x, y are 128-bit blocks (16 bytes array).
* x ^= y where x, y are 128-bit blocks (16 bytes array).
...
@@ -33,13 +31,13 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
...
@@ -33,13 +31,13 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
u8
feedback
;
u8
feedback
;
u8
tmp
[
2
*
BLOCKBYTES
];
u8
tmp
[
2
*
BLOCKBYTES
];
memset
(
tmp
,
0x00
,
2
*
BLOCKBYTES
);
memset
(
tmp
,
0x00
,
2
*
BLOCKBYTES
);
memset
(
auth
,
0x00
,
BLOCKBYTES
);
SET_DOMAIN
(
tmp
,
0x02
);
SET_DOMAIN
(
tmp
,
0x02
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
memset
(
auth
,
0x00
,
BLOCKBYTES
);
while
(
adlen
>=
2
*
BLOCKBYTES
)
{
while
(
adlen
>=
2
*
BLOCKBYTES
)
{
LE_STR_64
(
tmp
,
lfsr
);
LE_STR_64
(
tmp
,
lfsr
);
UPDATE_LFSR
(
lfsr
);
UPDATE_LFSR
(
lfsr
);
LE_STR_64
(
tmp
+
BLOCKBYTES
,
lfsr
);
LE_STR_64
(
tmp
+
BLOCKBYTES
,
lfsr
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
+
BLOCKBYTES
);
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
+
BLOCKBYTES
);
skinny128_384_plus_encrypt
(
tmp
,
tmp
+
BLOCKBYTES
,
ad
,
ad
+
BLOCKBYTES
,
*
tk
);
skinny128_384_plus_encrypt
(
tmp
,
tmp
+
BLOCKBYTES
,
ad
,
ad
+
BLOCKBYTES
,
*
tk
);
xor_block
(
auth
,
tmp
);
xor_block
(
auth
,
tmp
);
...
@@ -47,6 +45,9 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
...
@@ -47,6 +45,9 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
adlen
-=
2
*
BLOCKBYTES
;
adlen
-=
2
*
BLOCKBYTES
;
ad
+=
2
*
BLOCKBYTES
;
ad
+=
2
*
BLOCKBYTES
;
UPDATE_LFSR
(
lfsr
);
UPDATE_LFSR
(
lfsr
);
memset
(
tmp
,
0x00
,
2
*
BLOCKBYTES
);
// to save 32 bytes of RAM
SET_DOMAIN
(
tmp
,
0x02
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
}
}
if
(
adlen
>
BLOCKBYTES
)
{
// pad and process 2 blocs in //
if
(
adlen
>
BLOCKBYTES
)
{
// pad and process 2 blocs in //
LE_STR_64
(
tmp
,
lfsr
);
LE_STR_64
(
tmp
,
lfsr
);
...
@@ -65,11 +66,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
...
@@ -65,11 +66,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
LE_STR_64
(
tmp
,
lfsr
);
LE_STR_64
(
tmp
,
lfsr
);
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
skinny128_384_plus_encrypt
(
auth
,
c
,
ad
,
c
,
*
tk
);
skinny128_384_plus_encrypt
(
tmp
,
c
,
ad
,
c
,
*
tk
);
}
else
{
// if tag has been calculated yet
}
else
{
// if tag has been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
skinny128_384_plus_encrypt
(
auth
,
auth
,
ad
,
ad
,
*
tk
);
skinny128_384_plus_encrypt
(
tmp
,
tmp
,
ad
,
ad
,
*
tk
);
}
}
xor_block
(
auth
,
tmp
);
}
else
if
(
adlen
>
0
)
{
}
else
if
(
adlen
>
0
)
{
LE_STR_64
(
tmp
,
lfsr
);
LE_STR_64
(
tmp
,
lfsr
);
SET_DOMAIN
(
tmp
,
0x03
);
// domain for padding ad
SET_DOMAIN
(
tmp
,
0x03
);
// domain for padding ad
...
@@ -78,11 +80,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
...
@@ -78,11 +80,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
tmp
[
BLOCKBYTES
+
adlen
]
^=
0x80
;
// padding
tmp
[
BLOCKBYTES
+
adlen
]
^=
0x80
;
// padding
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
skinny128_384_plus_encrypt
(
auth
,
c
,
tmp
+
BLOCKBYTES
,
c
,
*
tk
);
skinny128_384_plus_encrypt
(
tmp
,
c
,
tmp
+
BLOCKBYTES
,
c
,
*
tk
);
}
else
{
// if tag has been calculated yet
}
else
{
// if tag has been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
skinny128_384_plus_encrypt
(
auth
,
auth
,
tmp
+
BLOCKBYTES
,
tmp
+
BLOCKBYTES
,
*
tk
);
skinny128_384_plus_encrypt
(
tmp
,
tmp
,
tmp
+
BLOCKBYTES
,
tmp
+
BLOCKBYTES
,
*
tk
);
}
}
xor_block
(
auth
,
tmp
);
}
}
}
}
...
@@ -290,4 +293,4 @@ int crypto_aead_decrypt (unsigned char *m, unsigned long long *mlen,
...
@@ -290,4 +293,4 @@ int crypto_aead_decrypt (unsigned char *m, unsigned long long *mlen,
feedback
|=
sum
[
i
]
^
c
[
i
];
// constant-time tag verification
feedback
|=
sum
[
i
]
^
c
[
i
];
// constant-time tag verification
return
feedback
;
return
feedback
;
// ----------------- Process the associated data -----------------
// ----------------- Process the associated data -----------------
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinny128.c
View file @
cad26506
...
@@ -16,12 +16,9 @@
...
@@ -16,12 +16,9 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* alexandre.adomnicai@ntu.edu.sg
*
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "skinny128.h"
#include "tk_schedule.h"
/****************************************************************************
/****************************************************************************
* The MixColumns operation for rounds i such that (i % 4) == 0.
* The MixColumns operation for rounds i such that (i % 4) == 0.
...
@@ -84,7 +81,7 @@ void mixcolumns_3(u32* state) {
...
@@ -84,7 +81,7 @@ void mixcolumns_3(u32* state) {
}
}
/****************************************************************************
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 0
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 0
****************************************************************************/
****************************************************************************/
void
inv_mixcolumns_0
(
u32
*
state
)
{
void
inv_mixcolumns_0
(
u32
*
state
)
{
u32
tmp
;
u32
tmp
;
...
@@ -99,7 +96,7 @@ void inv_mixcolumns_0(u32* state) {
...
@@ -99,7 +96,7 @@ void inv_mixcolumns_0(u32* state) {
}
}
/****************************************************************************
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 1
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 1
****************************************************************************/
****************************************************************************/
void
inv_mixcolumns_1
(
u32
*
state
)
{
void
inv_mixcolumns_1
(
u32
*
state
)
{
u32
tmp
;
u32
tmp
;
...
@@ -114,7 +111,7 @@ void inv_mixcolumns_1(u32* state) {
...
@@ -114,7 +111,7 @@ void inv_mixcolumns_1(u32* state) {
}
}
/****************************************************************************
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 2
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 2
****************************************************************************/
****************************************************************************/
void
inv_mixcolumns_2
(
u32
*
state
)
{
void
inv_mixcolumns_2
(
u32
*
state
)
{
u32
tmp
;
u32
tmp
;
...
@@ -129,7 +126,7 @@ void inv_mixcolumns_2(u32* state) {
...
@@ -129,7 +126,7 @@ void inv_mixcolumns_2(u32* state) {
}
}
/****************************************************************************
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 3
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 3
****************************************************************************/
****************************************************************************/
void
inv_mixcolumns_3
(
u32
*
state
)
{
void
inv_mixcolumns_3
(
u32
*
state
)
{
u32
tmp
;
u32
tmp
;
...
@@ -166,16 +163,8 @@ void skinny128_384_plus_encrypt(u8* ctext, u8* ctext_bis, const u8* ptext,
...
@@ -166,16 +163,8 @@ void skinny128_384_plus_encrypt(u8* ctext, u8* ctext_bis, const u8* ptext,
const
u8
*
ptext_bis
,
const
tweakey
tk
)
{
const
u8
*
ptext_bis
,
const
tweakey
tk
)
{
u32
state
[
8
];
u32
state
[
8
];
packing
(
state
,
ptext
,
ptext_bis
);
packing
(
state
,
ptext
,
ptext_bis
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
(
i
%
4
)
*
32
,
tk
.
rtk2_3
+
i
*
32
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
160
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
192
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
224
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
256
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
288
);
unpacking
(
ctext
,
ctext_bis
,
state
);
unpacking
(
ctext
,
ctext_bis
,
state
);
}
}
...
@@ -188,15 +177,7 @@ void skinny128_384_plus_decrypt(u8* ptext, u8* ptext_bis, const u8* ctext,
...
@@ -188,15 +177,7 @@ void skinny128_384_plus_decrypt(u8* ptext, u8* ptext_bis, const u8* ctext,
const
u8
*
ctext_bis
,
const
tweakey
tk
)
{
const
u8
*
ctext_bis
,
const
tweakey
tk
)
{
u32
state
[
8
];
u32
state
[
8
];
packing
(
state
,
ctext
,
ctext_bis
);
packing
(
state
,
ctext
,
ctext_bis
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
288
);
for
(
int
i
=
9
;
i
>=
0
;
i
--
)
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
256
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
(
i
%
4
)
*
32
,
tk
.
rtk2_3
+
i
*
32
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
224
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
192
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
160
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
128
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
96
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
64
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
32
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
);
unpacking
(
ptext
,
ptext_bis
,
state
);
unpacking
(
ptext
,
ptext_bis
,
state
);
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinny128.h
View file @
cad26506
#ifndef SKINNY128_H_
#ifndef SKINNY128_H_
#define SKINNY128_H_
#define SKINNY128_H_
#include "tk_schedule.h"
#include "tk_schedule.h"
void
skinny128_384_plus_encrypt
(
u8
*
ctext
,
u8
*
ctext_bis
,
const
u8
*
ptext
,
void
skinny128_384_plus_encrypt
(
u8
*
ctext
,
u8
*
ctext_bis
,
const
u8
*
ptext
,
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/skinnyaead.h
View file @
cad26506
...
@@ -3,9 +3,7 @@
...
@@ -3,9 +3,7 @@
#include "skinny128.h"
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
uint64_t
u64
;
typedef
unsigned
int
u32
;
typedef
unsigned
long
long
u64
;
#define TAGBYTES 16
#define TAGBYTES 16
#define KEYBYTES 16
#define KEYBYTES 16
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/tk_schedule.c
View file @
cad26506
...
@@ -7,15 +7,11 @@
...
@@ -7,15 +7,11 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* alexandre.adomnicai@ntu.edu.sg
*
*
* @date
May
2020
* @date
June
2020
*******************************************************************************/
*******************************************************************************/
#include <stdio.h>
#include <string.h>
#include <string.h>
#include "tk_schedule.h"
#include "tk_schedule.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
/****************************************************************************
/****************************************************************************
* The round constants according to the fixsliced representation.
* The round constants according to the fixsliced representation.
****************************************************************************/
****************************************************************************/
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128+v1/opt32_2/tk_schedule.h
View file @
cad26506
#ifndef TK_SCHEDULE_BS_H_
#ifndef TK_SCHEDULE_BS_H_
#define TK_SCHEDULE_BS_H_
#define TK_SCHEDULE_BS_H_
typedef
unsigned
char
u8
;
#include <stdint.h>
typedef
unsigned
int
u32
;
typedef
uint8_t
u8
;
typedef
uint32_t
u32
;
typedef
struct
{
typedef
struct
{
u32
rtk1
[
8
*
16
];
u32
rtk1
[
8
*
16
];
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/encrypt.c
View file @
cad26506
/******************************************************************************
/******************************************************************************
* Constant-time implementation of SKINNY-AEAD-M1 (v1.1).
* Constant-time implementation of SKINNY-AEAD-M1(v1).
*
* Two blocks are treated in parallel with SKINNY-128-384 whenever possible.
*
*
* For more details, see the paper at: https://
* For more details, see the paper at: https://
*
*
* @author Alexandre Adomnicai, Nanyang Technological University,
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* alexandre.adomnicai@ntu.edu.sg
*
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
******************************************************************************/
#include "skinny128.h"
#include "skinnyaead.h"
#include "skinnyaead.h"
#include <string.h>
#include <string.h>
#include <stdio.h>
/******************************************************************************
/******************************************************************************
* x ^= y where x, y are 128-bit blocks (16 bytes array).
* x ^= y where x, y are 128-bit blocks (16 bytes array).
...
@@ -75,12 +71,6 @@ int crypto_aead_encrypt (unsigned char *c, unsigned long long *clen,
...
@@ -75,12 +71,6 @@ int crypto_aead_encrypt (unsigned char *c, unsigned long long *clen,
}
}
LE_STR_64
(
tmp
,
lfsr
);
// lfsr for tag computation
LE_STR_64
(
tmp
,
lfsr
);
// lfsr for tag computation
precompute_rtk1
(
rtk1
,
tmp
);
precompute_rtk1
(
rtk1
,
tmp
);
for
(
int
i
=
0
;
i
<
16
;
i
++
)
{
printf
(
"%08x %08x %08x %08x
\n
"
,
rtk1
[
i
*
4
],
rtk1
[
i
*
4
+
1
],
rtk1
[
i
*
4
+
2
],
rtk1
[
i
*
4
+
3
]);
}
for
(
int
i
=
0
;
i
<
56
;
i
++
)
{
printf
(
"%08x %08x %08x %08x
\n
"
,
rtk2_3
[
i
*
4
],
rtk2_3
[
i
*
4
+
1
],
rtk2_3
[
i
*
4
+
2
],
rtk2_3
[
i
*
4
+
3
]);
}
skinny128_384_encrypt
(
c
,
c
,
rtk1
,
rtk2_3
);
// compute the tag
skinny128_384_encrypt
(
c
,
c
,
rtk1
,
rtk2_3
);
// compute the tag
// ----------------- Process the plaintext -----------------
// ----------------- Process the plaintext -----------------
...
@@ -200,4 +190,4 @@ int crypto_aead_decrypt (unsigned char *m, unsigned long long *mlen,
...
@@ -200,4 +190,4 @@ int crypto_aead_decrypt (unsigned char *m, unsigned long long *mlen,
feedback
|=
sum
[
i
]
^
c
[
i
];
// constant-time tag verification
feedback
|=
sum
[
i
]
^
c
[
i
];
// constant-time tag verification
return
feedback
;
return
feedback
;
// ----------------- Process the associated data -----------------
// ----------------- Process the associated data -----------------
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/skinny128.c
View file @
cad26506
...
@@ -16,12 +16,9 @@
...
@@ -16,12 +16,9 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* alexandre.adomnicai@ntu.edu.sg
*
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "skinny128.h"
#include "tk_schedule.h"
/******************************************************************************
/******************************************************************************
* The MixColumns computation for rounds i such that (i % 4) == 0
* The MixColumns computation for rounds i such that (i % 4) == 0
...
@@ -153,20 +150,8 @@ void skinny128_384_encrypt(u8* ctext, const u8* ptext, const u32* rtk1,
...
@@ -153,20 +150,8 @@ void skinny128_384_encrypt(u8* ctext, const u8* ptext, const u32* rtk1,
u32
tmp
;
// used in SWAPMOVE macro
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
packing
(
state
,
ptext
);
// from byte to bitsliced representation
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
for
(
int
i
=
0
;
i
<
14
;
i
++
)
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
160
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
176
);
QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
192
);
QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
208
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
}
...
@@ -180,19 +165,7 @@ void skinny128_384_decrypt(u8* ctext, const u8* ptext, const u32* rtk1,
...
@@ -180,19 +165,7 @@ void skinny128_384_decrypt(u8* ctext, const u8* ptext, const u32* rtk1,
u32
tmp
;
// used in SWAPMOVE macro
u32
tmp
;
// used in SWAPMOVE macro
u32
state
[
4
];
// 128-bit state
u32
state
[
4
];
// 128-bit state
packing
(
state
,
ptext
);
// from byte to bitsliced representation
packing
(
state
,
ptext
);
// from byte to bitsliced representation
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
208
);
for
(
int
i
=
13
;
i
>=
0
;
i
--
)
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
192
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
(
i
%
4
)
*
16
,
rtk2_3
+
i
*
16
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
176
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
160
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
144
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
128
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
112
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
96
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
80
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
+
64
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
48
,
rtk2_3
+
48
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
32
,
rtk2_3
+
32
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
+
16
,
rtk2_3
+
16
);
INV_QUADRUPLE_ROUND
(
state
,
rtk1
,
rtk2_3
);
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
unpacking
(
ctext
,
state
);
// from bitsliced to byte representation
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/skinnyaead.h
View file @
cad26506
...
@@ -3,9 +3,7 @@
...
@@ -3,9 +3,7 @@
#include "skinny128.h"
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
uint64_t
u64
;
typedef
unsigned
int
u32
;
typedef
unsigned
long
long
u64
;
#define TAGBYTES 16
#define TAGBYTES 16
#define KEYBYTES 16
#define KEYBYTES 16
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/tk_schedule.c
View file @
cad26506
...
@@ -4,16 +4,11 @@
...
@@ -4,16 +4,11 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* alexandre.adomnicai@ntu.edu.sg
*
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include <string.h> //for memcmp
#include "tk_schedule.h"
#include "skinny128.h"
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
/******************************************************************************
/******************************************************************************
* The round constants according to the new representation.
* The round constants according to the new representation.
******************************************************************************/
******************************************************************************/
...
@@ -271,6 +266,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
...
@@ -271,6 +266,70 @@ void precompute_lfsr_tk3(u32* tk, const u8* key, const int rounds) {
}
}
/******************************************************************************
/******************************************************************************
* Precompute LFSR2(TK2) ^ LFSR3(TK3) for all round tweakeys.
* It is equivalent to the following 2 function calls:
* - precompute_lfsr_tk2(tk, t2, SKINNY128_384_ROUNDS);
* - precompute_lfsr_tk3(tk, t3, SKINNY128_384_ROUNDS);
* However 'precompute_lfsr_tk2_3' can allow to save cycles on some platform.
* On ARMv7 one should observe a gain of ~1k cycles per function call. It can be
* explained by the fact that less memory accesses to 'tk' are computed.
*
* To save some code size, the loop can be replaced by the following one:
* for(int i = 0 ; i < rounds; i+=2) {
* lfsr2_bs(tk2);
* lfsr3_bs(tk3);
* tk[i*4+4] = tk2[0] ^ tk3[0];
* tk[i*4+5] = tk2[1] ^ tk3[1];
* tk[i*4+6] = tk2[2] ^ tk3[2];
* tk[i*4+7] = tk2[3] ^ tk3[3];
* }
* at the cost of some cycles (~260 on ARM Cortex-M).
******************************************************************************/
void
precompute_lfsr_tk2_3
(
u32
*
tk
,
const
u8
*
t2
,
const
u8
*
t3
,
const
int
rounds
)
{
u32
tk2
[
4
],
tk3
[
4
];
packing
(
tk2
,
t2
);
packing
(
tk3
,
t3
);
tk
[
0
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
1
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
2
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
3
]
=
tk2
[
3
]
^
tk3
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
tk2
[
0
]
^=
(
tk2
[
2
]
&
0xaaaaaaaa
);
tk2
[
0
]
=
((
tk2
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
3
]
^=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
3
]
=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
4
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
5
]
=
tk2
[
2
]
^
tk3
[
0
];
tk
[
i
*
4
+
6
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
7
]
=
tk2
[
0
]
^
tk3
[
2
];
tk2
[
1
]
^=
(
tk2
[
3
]
&
0xaaaaaaaa
);
tk2
[
1
]
=
((
tk2
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
2
]
^=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
2
]
=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
12
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
13
]
=
tk2
[
3
]
^
tk3
[
3
];
tk
[
i
*
4
+
14
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
15
]
=
tk2
[
1
]
^
tk3
[
1
];
tk2
[
2
]
^=
(
tk2
[
0
]
&
0xaaaaaaaa
);
tk2
[
2
]
=
((
tk2
[
2
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
2
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
1
]
^=
((
tk3
[
3
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
1
]
=
((
tk3
[
1
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
1
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
20
]
=
tk2
[
3
]
^
tk3
[
1
];
tk
[
i
*
4
+
21
]
=
tk2
[
0
]
^
tk3
[
2
];
tk
[
i
*
4
+
22
]
=
tk2
[
1
]
^
tk3
[
3
];
tk
[
i
*
4
+
23
]
=
tk2
[
2
]
^
tk3
[
0
];
tk2
[
3
]
^=
(
tk2
[
1
]
&
0xaaaaaaaa
);
tk2
[
3
]
=
((
tk2
[
3
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk2
[
3
]
<<
1
)
&
0xaaaaaaaa
);
tk3
[
0
]
^=
((
tk3
[
2
]
&
0xaaaaaaaa
)
>>
1
);
tk3
[
0
]
=
((
tk3
[
0
]
&
0xaaaaaaaa
)
>>
1
)
|
((
tk3
[
0
]
<<
1
)
&
0xaaaaaaaa
);
tk
[
i
*
4
+
28
]
=
tk2
[
0
]
^
tk3
[
0
];
tk
[
i
*
4
+
29
]
=
tk2
[
1
]
^
tk3
[
1
];
tk
[
i
*
4
+
30
]
=
tk2
[
2
]
^
tk3
[
2
];
tk
[
i
*
4
+
31
]
=
tk2
[
3
]
^
tk3
[
3
];
}
}
/******************************************************************************
* XOR TK with TK1 before applying the permutations.
* XOR TK with TK1 before applying the permutations.
* The key is then rearranged to match the barrel shiftrows representation.
* The key is then rearranged to match the barrel shiftrows representation.
******************************************************************************/
******************************************************************************/
...
@@ -278,19 +337,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -278,19 +337,20 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
u32
test
;
u32
test
;
u32
tk1
[
4
],
tmp
[
4
];
u32
tk1
[
4
],
tmp
[
4
];
packing
(
tk1
,
key
);
packing
(
tk1
,
key
);
memcpy
(
tmp
,
tk
,
16
);
tmp
[
0
]
=
tk
[
0
]
^
tk1
[
0
];
tmp
[
0
]
^=
tk1
[
0
];
tmp
[
1
]
=
tk
[
1
]
^
tk1
[
1
];
tmp
[
1
]
^=
tk1
[
1
];
tmp
[
2
]
=
tk
[
2
]
^
tk1
[
2
];
tmp
[
2
]
^=
tk1
[
2
];
tmp
[
3
]
=
tk
[
3
]
^
tk1
[
3
];
tmp
[
3
]
^=
tk1
[
3
];
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
for
(
int
i
=
0
;
i
<
rounds
;
i
+=
8
)
{
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
test
=
(
i
%
16
<
8
)
?
1
:
0
;
//to apply the right power of P
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
]
=
tmp
[
2
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
1
]
=
tmp
[
3
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
2
]
=
tmp
[
0
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
tk
[
i
*
4
+
3
]
=
tmp
[
1
]
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
4
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
4
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
5
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
6
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
7
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_2
(
tmp
);
// applies P^2
permute_tk_2
(
tmp
);
// applies P^2
else
else
...
@@ -307,8 +367,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -307,8 +367,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
10
]
|=
ROR
(
tmp
[
0
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
=
ROR
(
tmp
[
1
],
28
)
&
0x03030303
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
11
]
|=
ROR
(
tmp
[
1
],
12
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
12
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
12
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
13
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
14
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
15
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_4
(
tmp
);
// applies P^4
permute_tk_4
(
tmp
);
// applies P^4
else
else
...
@@ -321,8 +383,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -321,8 +383,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
17
]
=
ROR
(
tmp
[
3
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
18
]
=
ROR
(
tmp
[
0
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
tk
[
i
*
4
+
19
]
=
ROR
(
tmp
[
1
],
16
)
&
0xf0f0f0f0
;
memcpy
(
tmp
,
tk
+
i
*
4
+
20
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
20
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
21
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
22
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
23
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_6
(
tmp
);
// applies P^6
permute_tk_6
(
tmp
);
// applies P^6
else
else
...
@@ -339,8 +403,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -339,8 +403,10 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
26
]
|=
ROR
(
tmp
[
0
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
=
ROR
(
tmp
[
1
],
12
)
&
0x03030303
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
tk
[
i
*
4
+
27
]
|=
ROR
(
tmp
[
1
],
28
)
&
0x0c0c0c0c
;
memcpy
(
tmp
,
tk
+
i
*
4
+
28
,
16
);
tmp
[
0
]
=
tk
[
i
*
4
+
28
]
^
tk1
[
0
];
XOR_BLOCKS
(
tmp
,
tk1
);
tmp
[
1
]
=
tk
[
i
*
4
+
29
]
^
tk1
[
1
];
tmp
[
2
]
=
tk
[
i
*
4
+
30
]
^
tk1
[
2
];
tmp
[
3
]
=
tk
[
i
*
4
+
31
]
^
tk1
[
3
];
if
(
test
)
if
(
test
)
permute_tk_8
(
tmp
);
// applies P^8
permute_tk_8
(
tmp
);
// applies P^8
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
...
@@ -361,8 +427,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
...
@@ -361,8 +427,7 @@ void permute_tk(u32* tk, const u8* key, const int rounds) {
******************************************************************************/
******************************************************************************/
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
)
{
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
memset
(
rtk
,
0x00
,
16
*
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2
(
rtk
,
tk2
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk2_3
(
rtk
,
tk2
,
tk3
,
SKINNY128_384_ROUNDS
);
precompute_lfsr_tk3
(
rtk
,
tk3
,
SKINNY128_384_ROUNDS
);
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
permute_tk
(
rtk
,
(
u8
*
)(
rtk
+
8
),
SKINNY128_384_ROUNDS
);
// rtk+8 is NULL
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
i
=
0
;
i
<
SKINNY128_384_ROUNDS
;
i
++
)
{
// add rconsts
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
@@ -376,4 +441,4 @@ void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8 * tk3) {
...
@@ -376,4 +441,4 @@ void precompute_rtk2_3(u32* rtk, const u8* tk2, const u8 * tk3) {
void
precompute_rtk1
(
u32
*
rtk1
,
const
u8
*
tk1
)
{
void
precompute_rtk1
(
u32
*
rtk1
,
const
u8
*
tk1
)
{
memset
(
rtk1
,
0x00
,
16
*
16
);
memset
(
rtk1
,
0x00
,
16
*
16
);
permute_tk
(
rtk1
,
tk1
,
16
);
permute_tk
(
rtk1
,
tk1
,
16
);
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_1/tk_schedule.h
View file @
cad26506
#ifndef TK_SCHEDULE_H_
#ifndef TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
#define TK_SCHEDULE_H_
typedef
unsigned
char
u8
;
#include <stdint.h>
typedef
unsigned
int
u32
;
typedef
uint8_t
u8
;
typedef
uint32_t
u32
;
void
packing
(
u32
*
out
,
const
u8
*
in
);
void
packing
(
u32
*
out
,
const
u8
*
in
);
void
unpacking
(
u8
*
out
,
u32
*
in
);
void
unpacking
(
u8
*
out
,
u32
*
in
);
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
);
void
precompute_rtk2_3
(
u32
*
rtk
,
const
u8
*
tk2
,
const
u8
*
tk3
);
void
precompute_rtk1
(
u32
*
rtk1
,
const
u8
*
tk1
);
void
precompute_rtk1
(
u32
*
rtk1
,
const
u8
*
tk1
);
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define ROR(x,y) (((x) >> (y)) | ((x) << (32 - (y))))
#define XOR_BLOCKS(x,y) ({ \
(x)[0] ^= (y)[0]; \
(x)[1] ^= (y)[1]; \
(x)[2] ^= (y)[2]; \
(x)[3] ^= (y)[3]; \
})
#define SWAPMOVE(a, b, mask, n) ({ \
#define SWAPMOVE(a, b, mask, n) ({ \
tmp = (b ^ (a >> n)) & mask; \
tmp = (b ^ (a >> n)) & mask; \
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/encrypt.c
View file @
cad26506
...
@@ -8,12 +8,10 @@
...
@@ -8,12 +8,10 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* alexandre.adomnicai@ntu.edu.sg
*
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
******************************************************************************/
#include "skinny128.h"
#include "skinnyaead.h"
#include "skinnyaead.h"
#include <string.h>
#include <string.h>
#include <stdio.h>
/******************************************************************************
/******************************************************************************
* x ^= y where x, y are 128-bit blocks (16 bytes array).
* x ^= y where x, y are 128-bit blocks (16 bytes array).
...
@@ -33,13 +31,13 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
...
@@ -33,13 +31,13 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
u8
feedback
;
u8
feedback
;
u8
tmp
[
2
*
BLOCKBYTES
];
u8
tmp
[
2
*
BLOCKBYTES
];
memset
(
tmp
,
0x00
,
2
*
BLOCKBYTES
);
memset
(
tmp
,
0x00
,
2
*
BLOCKBYTES
);
memset
(
auth
,
0x00
,
BLOCKBYTES
);
SET_DOMAIN
(
tmp
,
0x02
);
SET_DOMAIN
(
tmp
,
0x02
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
memset
(
auth
,
0x00
,
BLOCKBYTES
);
while
(
adlen
>=
2
*
BLOCKBYTES
)
{
while
(
adlen
>=
2
*
BLOCKBYTES
)
{
LE_STR_64
(
tmp
,
lfsr
);
LE_STR_64
(
tmp
,
lfsr
);
UPDATE_LFSR
(
lfsr
);
UPDATE_LFSR
(
lfsr
);
LE_STR_64
(
tmp
+
BLOCKBYTES
,
lfsr
);
LE_STR_64
(
tmp
+
BLOCKBYTES
,
lfsr
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
+
BLOCKBYTES
);
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
+
BLOCKBYTES
);
skinny128_384_encrypt
(
tmp
,
tmp
+
BLOCKBYTES
,
ad
,
ad
+
BLOCKBYTES
,
*
tk
);
skinny128_384_encrypt
(
tmp
,
tmp
+
BLOCKBYTES
,
ad
,
ad
+
BLOCKBYTES
,
*
tk
);
xor_block
(
auth
,
tmp
);
xor_block
(
auth
,
tmp
);
...
@@ -47,6 +45,9 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
...
@@ -47,6 +45,9 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
adlen
-=
2
*
BLOCKBYTES
;
adlen
-=
2
*
BLOCKBYTES
;
ad
+=
2
*
BLOCKBYTES
;
ad
+=
2
*
BLOCKBYTES
;
UPDATE_LFSR
(
lfsr
);
UPDATE_LFSR
(
lfsr
);
memset
(
tmp
,
0x00
,
2
*
BLOCKBYTES
);
// to save 32 bytes of RAM
SET_DOMAIN
(
tmp
,
0x02
);
SET_DOMAIN
(
tmp
+
BLOCKBYTES
,
0x02
);
}
}
if
(
adlen
>
BLOCKBYTES
)
{
// pad and process 2 blocs in //
if
(
adlen
>
BLOCKBYTES
)
{
// pad and process 2 blocs in //
LE_STR_64
(
tmp
,
lfsr
);
LE_STR_64
(
tmp
,
lfsr
);
...
@@ -65,11 +66,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
...
@@ -65,11 +66,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
LE_STR_64
(
tmp
,
lfsr
);
LE_STR_64
(
tmp
,
lfsr
);
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
skinny128_384_encrypt
(
auth
,
c
,
ad
,
c
,
*
tk
);
skinny128_384_encrypt
(
tmp
,
c
,
ad
,
c
,
*
tk
);
}
else
{
// if tag has been calculated yet
}
else
{
// if tag has been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
skinny128_384_encrypt
(
auth
,
auth
,
ad
,
ad
,
*
tk
);
skinny128_384_encrypt
(
tmp
,
tmp
,
ad
,
ad
,
*
tk
);
}
}
xor_block
(
auth
,
tmp
);
}
else
if
(
adlen
>
0
)
{
}
else
if
(
adlen
>
0
)
{
LE_STR_64
(
tmp
,
lfsr
);
LE_STR_64
(
tmp
,
lfsr
);
SET_DOMAIN
(
tmp
,
0x03
);
// domain for padding ad
SET_DOMAIN
(
tmp
,
0x03
);
// domain for padding ad
...
@@ -78,11 +80,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
...
@@ -78,11 +80,12 @@ static void skinny_aead_m1_auth(u8* auth, u8* c, u8* tag, tweakey* tk,
tmp
[
BLOCKBYTES
+
adlen
]
^=
0x80
;
// padding
tmp
[
BLOCKBYTES
+
adlen
]
^=
0x80
;
// padding
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
if
(
mlen
==
0
)
{
// if tag has *NOT* been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tag
);
// compute the tag
skinny128_384_encrypt
(
auth
,
c
,
tmp
+
BLOCKBYTES
,
c
,
*
tk
);
skinny128_384_encrypt
(
tmp
,
c
,
tmp
+
BLOCKBYTES
,
c
,
*
tk
);
}
else
{
// if tag has been calculated yet
}
else
{
// if tag has been calculated yet
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
precompute_rtk1
(
tk
->
rtk1
,
tmp
,
tmp
);
// process last ad block
skinny128_384_encrypt
(
auth
,
auth
,
tmp
+
BLOCKBYTES
,
tmp
+
BLOCKBYTES
,
*
tk
);
skinny128_384_encrypt
(
tmp
,
tmp
,
tmp
+
BLOCKBYTES
,
tmp
+
BLOCKBYTES
,
*
tk
);
}
}
xor_block
(
auth
,
tmp
);
}
}
}
}
...
@@ -290,4 +293,4 @@ int crypto_aead_decrypt (unsigned char *m, unsigned long long *mlen,
...
@@ -290,4 +293,4 @@ int crypto_aead_decrypt (unsigned char *m, unsigned long long *mlen,
feedback
|=
sum
[
i
]
^
c
[
i
];
// constant-time tag verification
feedback
|=
sum
[
i
]
^
c
[
i
];
// constant-time tag verification
return
feedback
;
return
feedback
;
// ----------------- Process the associated data -----------------
// ----------------- Process the associated data -----------------
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/skinny128.c
View file @
cad26506
...
@@ -16,12 +16,9 @@
...
@@ -16,12 +16,9 @@
* @author Alexandre Adomnicai, Nanyang Technological University,
* @author Alexandre Adomnicai, Nanyang Technological University,
* alexandre.adomnicai@ntu.edu.sg
* alexandre.adomnicai@ntu.edu.sg
*
*
* @date
May
2020
* @date
June
2020
******************************************************************************/
******************************************************************************/
#include <stdio.h>
#include <string.h>
#include "skinny128.h"
#include "skinny128.h"
#include "tk_schedule.h"
/****************************************************************************
/****************************************************************************
* The MixColumns operation for rounds i such that (i % 4) == 0.
* The MixColumns operation for rounds i such that (i % 4) == 0.
...
@@ -84,7 +81,7 @@ void mixcolumns_3(u32* state) {
...
@@ -84,7 +81,7 @@ void mixcolumns_3(u32* state) {
}
}
/****************************************************************************
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 0
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 0
****************************************************************************/
****************************************************************************/
void
inv_mixcolumns_0
(
u32
*
state
)
{
void
inv_mixcolumns_0
(
u32
*
state
)
{
u32
tmp
;
u32
tmp
;
...
@@ -99,7 +96,7 @@ void inv_mixcolumns_0(u32* state) {
...
@@ -99,7 +96,7 @@ void inv_mixcolumns_0(u32* state) {
}
}
/****************************************************************************
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 1
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 1
****************************************************************************/
****************************************************************************/
void
inv_mixcolumns_1
(
u32
*
state
)
{
void
inv_mixcolumns_1
(
u32
*
state
)
{
u32
tmp
;
u32
tmp
;
...
@@ -114,7 +111,7 @@ void inv_mixcolumns_1(u32* state) {
...
@@ -114,7 +111,7 @@ void inv_mixcolumns_1(u32* state) {
}
}
/****************************************************************************
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 2
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 2
****************************************************************************/
****************************************************************************/
void
inv_mixcolumns_2
(
u32
*
state
)
{
void
inv_mixcolumns_2
(
u32
*
state
)
{
u32
tmp
;
u32
tmp
;
...
@@ -129,7 +126,7 @@ void inv_mixcolumns_2(u32* state) {
...
@@ -129,7 +126,7 @@ void inv_mixcolumns_2(u32* state) {
}
}
/****************************************************************************
/****************************************************************************
* The inverse MixColumns o
ep
ration for rounds i such that (i % 4) == 3
* The inverse MixColumns o
pe
ration for rounds i such that (i % 4) == 3
****************************************************************************/
****************************************************************************/
void
inv_mixcolumns_3
(
u32
*
state
)
{
void
inv_mixcolumns_3
(
u32
*
state
)
{
u32
tmp
;
u32
tmp
;
...
@@ -166,20 +163,8 @@ void skinny128_384_encrypt(u8* ctext, u8* ctext_bis, const u8* ptext,
...
@@ -166,20 +163,8 @@ void skinny128_384_encrypt(u8* ctext, u8* ctext_bis, const u8* ptext,
const
u8
*
ptext_bis
,
const
tweakey
tk
)
{
const
u8
*
ptext_bis
,
const
tweakey
tk
)
{
u32
state
[
8
];
u32
state
[
8
];
packing
(
state
,
ptext
,
ptext_bis
);
packing
(
state
,
ptext
,
ptext_bis
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
);
for
(
int
i
=
0
;
i
<
14
;
i
++
)
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
32
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
(
i
%
4
)
*
32
,
tk
.
rtk2_3
+
i
*
32
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
64
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
96
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
128
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
160
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
192
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
224
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
256
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
288
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
320
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
352
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
384
);
QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
416
);
unpacking
(
ctext
,
ctext_bis
,
state
);
unpacking
(
ctext
,
ctext_bis
,
state
);
}
}
...
@@ -192,19 +177,7 @@ void skinny128_384_decrypt(u8* ptext, u8* ptext_bis, const u8* ctext,
...
@@ -192,19 +177,7 @@ void skinny128_384_decrypt(u8* ptext, u8* ptext_bis, const u8* ctext,
const
u8
*
ctext_bis
,
const
tweakey
tk
)
{
const
u8
*
ctext_bis
,
const
tweakey
tk
)
{
u32
state
[
8
];
u32
state
[
8
];
packing
(
state
,
ctext
,
ctext_bis
);
packing
(
state
,
ctext
,
ctext_bis
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
416
);
for
(
int
i
=
13
;
i
>=
0
;
i
--
)
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
384
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
(
i
%
4
)
*
32
,
tk
.
rtk2_3
+
i
*
32
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
352
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
320
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
288
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
256
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
224
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
192
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
160
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
+
128
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
96
,
tk
.
rtk2_3
+
96
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
64
,
tk
.
rtk2_3
+
64
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
+
32
,
tk
.
rtk2_3
+
32
);
INV_QUADRUPLE_ROUND
(
state
,
tk
.
rtk1
,
tk
.
rtk2_3
);
unpacking
(
ptext
,
ptext_bis
,
state
);
unpacking
(
ptext
,
ptext_bis
,
state
);
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/skinnyaead.h
View file @
cad26506
...
@@ -3,9 +3,7 @@
...
@@ -3,9 +3,7 @@
#include "skinny128.h"
#include "skinny128.h"
typedef
unsigned
char
u8
;
typedef
uint64_t
u64
;
typedef
unsigned
int
u32
;
typedef
unsigned
long
long
u64
;
#define TAGBYTES 16
#define TAGBYTES 16
#define KEYBYTES 16
#define KEYBYTES 16
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/tk_schedule.c
View file @
cad26506
...
@@ -9,13 +9,9 @@
...
@@ -9,13 +9,9 @@
*
*
* @date May 2020
* @date May 2020
*******************************************************************************/
*******************************************************************************/
#include <stdio.h>
#include <string.h>
#include <string.h>
#include "tk_schedule.h"
#include "tk_schedule.h"
typedef
unsigned
char
u8
;
typedef
unsigned
int
u32
;
/****************************************************************************
/****************************************************************************
* The round constants according to the fixsliced representation.
* The round constants according to the fixsliced representation.
****************************************************************************/
****************************************************************************/
...
...
This diff is collapsed.
Click to expand it.
skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/opt32_2/tk_schedule.h
View file @
cad26506
#ifndef TK_SCHEDULE_BS_H_
#ifndef TK_SCHEDULE_BS_H_
#define TK_SCHEDULE_BS_H_
#define TK_SCHEDULE_BS_H_
typedef
unsigned
char
u8
;
#include <stdint.h>
typedef
unsigned
int
u32
;
typedef
uint8_t
u8
;
typedef
uint32_t
u32
;
typedef
struct
{
typedef
struct
{
u32
rtk1
[
8
*
16
];
u32
rtk1
[
8
*
16
];
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment