Commit d43acb73 by Martin Schläffer Committed by Sebastian Renner

ascon

parent 7563a04f
---
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: false
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: WithoutElse
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: true
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<ext/.*\.h>'
Priority: 2
SortPriority: 0
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
Priority: 2
SortPriority: 0
- Regex: '.*'
Priority: 3
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentCaseLabels: true
IndentGotoLabels: true
IndentPPDirectives: None
IndentWidth: 2
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: google
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard: Auto
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 8
UseCRLF: false
UseTab: Never
...
cmake_minimum_required(VERSION 3.6)
project(ascon LANGUAGES C ASM)
enable_testing()
# set the default version, algorithms, implementations, tests, flags, defs
set(DEFAULT_VERSIONS v12)
set(DEFAULT_ALGS ascon128 ascon128a ascon80pq asconhash asconxof)
set(DEFAULT_IMPLS ref opt64 opt64_lowsize bi32 bi32_lowsize bi32_lowreg bi16 bi8
opt64_2shares opt64_3shares bi32_2shares bi32_3shares)
set(DEFAULT_TESTS genkat getcycles)
set(DEFAULT_REL_FLAGS -std=c99 -O2 -fomit-frame-pointer -march=native -mtune=native)
set(DEFAULT_DBG_FLAGS -std=c99 -O2 -Wall -Wextra -Wshadow)
set(DEFAULT_COMPILE_DEFS -DASCON_DATA_ACCESS='H')
# set cmake variables for version, algorithms, implementations, tests, flags, defs
set(VERSION_LIST ${DEFAULT_VERSIONS} CACHE STRING "Choose the ascon versions to include.")
set(ALG_LIST ${DEFAULT_ALGS} CACHE STRING "Choose the list of algorithms to include.")
set(IMPL_LIST ${DEFAULT_IMPLS} CACHE STRING "Choose the list of implementations to include.")
set(TEST_LIST ${DEFAULT_TESTS} CACHE STRING "Choose the list of tests to include.")
set(REL_FLAGS ${DEFAULT_REL_FLAGS} CACHE STRING "Define custom Release (performance) flags.")
set(DBG_FLAGS ${DEFAULT_DBG_FLAGS} CACHE STRING "Define custom Debug (NIST) flags.")
set(COMPILE_DEFS ${DEFAULT_COMPILE_DEFS} CACHE STRING "Define custom compile definitions.")
if(NOT WIN32 AND NOT CYGWIN AND NOT MSYS)
# use sanitizer in Debug build (but not on windows)
set(DBG_FLAGS ${DBG_FLAGS} -fsanitize=address,undefined -static-libasan)
endif()
if(MSVC)
set(DBG_FLAGS /Od)
endif()
# set the default build type for single-config generators if none was specified
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to 'Release' as none was specified.")
set(CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif()
# add platform specific implementations
message(STATUS "cmake host system name: ${CMAKE_HOST_SYSTEM_NAME}")
message(STATUS "cmake host system processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}")
if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL x86_64 OR ${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL AMD64)
set(DEFAULT_IMPLS ${DEFAULT_IMPLS})
elseif(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES ARM OR ${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES arm)
set(DEFAULT_IMPLS ${DEFAULT_IMPLS} bi32_arm neon)
endif()
set(KAT_PATH KAT)
set(TEST_PATH tests)
foreach(CRYPTO aead hash)
foreach(VER ${VERSION_LIST})
foreach(ALG ${ALG_LIST})
foreach(IMPL ${IMPL_LIST})
set(IMPL_PATH crypto_${CRYPTO}/${ALG}${VER}/${IMPL})
if(NOT EXISTS ${CMAKE_SOURCE_DIR}/${IMPL_PATH})
continue()
endif()
message("Adding implementation ${IMPL_PATH}")
string(REGEX REPLACE "/" "_" IMPL_NAME ${IMPL_PATH})
file(GLOB IMPL_FILES RELATIVE ${CMAKE_SOURCE_DIR} "${IMPL_PATH}/*.[ch]")
add_library(${IMPL_NAME} ${IMPL_FILES})
target_include_directories(${IMPL_NAME} PUBLIC ${IMPL_PATH} ${TEST_PATH})
target_compile_definitions(${IMPL_NAME} PRIVATE ${COMPILE_DEFS})
#target_compile_features(${IMPL_NAME} PUBLIC c_std_99) # cmake >= 3.8.2
target_compile_options(${IMPL_NAME} PUBLIC $<$<CONFIG:RELEASE>:${REL_FLAGS}>)
target_compile_options(${IMPL_NAME} PUBLIC $<$<CONFIG:DEBUG>:${DBG_FLAGS}>)
foreach(TEST_NAME ${TEST_LIST})
if(${TEST_NAME} STREQUAL genkat)
set(TEST_FILES ${TEST_PATH}/crypto_${CRYPTO}.h ${TEST_PATH}/${TEST_NAME}_${CRYPTO}.c)
else()
set(TEST_FILES ${TEST_PATH}/crypto_${CRYPTO}.h ${TEST_PATH}/${TEST_NAME}.c)
endif()
string(TOUPPER CRYPTO_${CRYPTO} CRYPTO_DEFINE)
set(EXE_NAME ${TEST_NAME}_${IMPL_NAME})
add_executable(${EXE_NAME} ${TEST_FILES})
target_compile_definitions(${EXE_NAME} PRIVATE ${CRYPTO_DEFINE})
target_link_libraries(${EXE_NAME} PRIVATE ${IMPL_NAME})
if(${TEST_NAME} STREQUAL genkat)
add_test(NAME ${EXE_NAME} COMMAND ${CMAKE_COMMAND}
-DEXE_NAME=${EXE_NAME} -DALG=${ALG}${VER}
-DSRC_DIR=${CMAKE_SOURCE_DIR} -DBIN_DIR=${CMAKE_BINARY_DIR}
-DCONFIG=$<CONFIGURATION> -P ${CMAKE_SOURCE_DIR}/genkat.cmake)
else()
add_test(${EXE_NAME} ${EXE_NAME})
endif()
endforeach()
endforeach()
endforeach()
endforeach()
endforeach()
CC0 1.0 Universal
Statement of Purpose
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator and
subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for the
purpose of contributing to a commons of creative, cultural and scientific
works ("Commons") that the public can reliably and without fear of later
claims of infringement build upon, modify, incorporate in other works, reuse
and redistribute as freely as possible in any form whatsoever and for any
purposes, including without limitation commercial purposes. These owners may
contribute to the Commons to promote the ideal of a free culture and the
further production of creative, cultural and scientific works, or to gain
reputation or greater distribution for their Work in part through the use and
efforts of others.
For these and/or other purposes and motivations, and without any expectation
of additional consideration or compensation, the person associating CC0 with a
Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
and publicly distribute the Work under its terms, with knowledge of his or her
Copyright and Related Rights in the Work and the meaning and intended legal
effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not limited
to, the following:
i. the right to reproduce, adapt, distribute, perform, display, communicate,
and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or likeness
depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data in
a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation thereof,
including any amended or successor version of such directive); and
vii. other similar, equivalent or corresponding rights throughout the world
based on applicable law or treaty, and any national implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention of,
applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
and Related Rights and associated claims and causes of action, whether now
known or unknown (including existing as well as future claims and causes of
action), in the Work (i) in all territories worldwide, (ii) for the maximum
duration provided by applicable law or treaty (including future time
extensions), (iii) in any current or future medium and for any number of
copies, and (iv) for any purpose whatsoever, including without limitation
commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
the Waiver for the benefit of each member of the public at large and to the
detriment of Affirmer's heirs and successors, fully intending that such Waiver
shall not be subject to revocation, rescission, cancellation, termination, or
any other legal or equitable action to disrupt the quiet enjoyment of the Work
by the public as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason be
judged legally invalid or ineffective under applicable law, then the Waiver
shall be preserved to the maximum extent permitted taking into account
Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
is so judged Affirmer hereby grants to each affected person a royalty-free,
non transferable, non sublicensable, non exclusive, irrevocable and
unconditional license to exercise Affirmer's Copyright and Related Rights in
the Work (i) in all territories worldwide, (ii) for the maximum duration
provided by applicable law or treaty (including future time extensions), (iii)
in any current or future medium and for any number of copies, and (iv) for any
purpose whatsoever, including without limitation commercial, advertising or
promotional purposes (the "License"). The License shall be deemed effective as
of the date CC0 was applied by Affirmer to the Work. Should any part of the
License for any reason be judged legally invalid or ineffective under
applicable law, such partial invalidity or ineffectiveness shall not
invalidate the remainder of the License, and in such case Affirmer hereby
affirms that he or she will not (i) exercise any of his or her remaining
Copyright and Related Rights in the Work or (ii) assert any associated claims
and causes of action with respect to the Work, in either case contrary to
Affirmer's express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or warranties
of any kind concerning the Work, express, implied, statutory or otherwise,
including without limitation warranties of title, merchantability, fitness
for a particular purpose, non infringement, or the absence of latent or
other defects, accuracy, or the present or absence of errors, whether or not
discoverable, all to the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without limitation
any person's Copyright and Related Rights in the Work. Further, Affirmer
disclaims responsibility for obtaining any necessary consents, permissions
or other rights required for any use of the Work.
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to this
CC0 or use of the Work.
For more information, please see
<http://creativecommons.org/publicdomain/zero/1.0/>
\ No newline at end of file
# Reference and optimized C and ASM implementations of Ascon
Ascon is a family of lightweight authenticated encryption schemes with associated data (AEAD), including a hash and extendible output function (XOF).
For more information on Ascon visit: https://ascon.iaik.tugraz.at/
This repository contains the following 5 Ascon algorithms:
- `crypto_aead/ascon128v12`: Ascon-128 v1.2
- `crypto_aead/ascon128av12`: Ascon-128a v1.2
- `crypto_aead/ascon80pqv12`: Ascon-80pq v1.2
- `crypto_hash/asconhashv12`: Ascon-Hash v1.2
- `crypto_hash/asconxofv12`: Ascon-Xof v1.2
and the following implementations:
- `ref`: reference implementation
- `opt64`: 64-bit speed-optimized C implementation
- `opt64_lowsize`: 64-bit size-optimized C implementation
- `neon`: NEON speed-optimized ARM inline assembly implementation
- `bi32`: 32-bit speed-optimized bit-interleaved C implementation
- `bi32_lowsize`: 32-bit size-optimized bit-interleaved C implementation
- `bi32_lowreg`: 32-bit speed-optimized bit-interleaved C implementation (low register usage)
- `bi32_arm`: 32-bit speed-optimized bit-interleaved ARM inline assembly implementation
- `bi16`: 16-bit optimized bit-interleaved C implementation
- `bi8`: 8-bit optimized bit-interleaved C implementation
## Performance results of Ascon-128 on different CPUs in cycles per byte:
| Message Length in Bytes: | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
| AMD Ryzen 7 1700\* | | | | | 14.5 | 8.8 | 8.6 |
| Intel Xeon E5-2609 v4\* | | | | | 17.3 | 10.8 | 10.5 |
| Cortex-A53 (ARMv8)\* | | | | | 18.3 | 11.3 | 11.0 |
| Intel Core i5-6300U | 367 | 58 | 35 | 23 | 17.6 | 11.9 | 11.4 |
| Intel Core i5-4200U | 521 | 81 | 49 | 32 | 23.9 | 16.2 | 15.8 |
| Cortex-A15 (ARMv7)\* | | | | | 69.8 | 36.2 | 34.6 |
| Cortex-A7 (NEON) | 2182 | 249 | 148 | 97 | 71.7 | 47.5 | 46.5 |
| Cortex-A7 (ARMv7) | 1871 | 292 | 175 | 115 | 86.6 | 58.3 | 57.2 |
| ARM1176JZF-S (ARMv6) | 2189 | 340 | 202 | 133 | 97.9 | 64.4 | 65.3 |
\* Results taken from eBACS: http://bench.cr.yp.to/
## Performance results of Ascon-128a on different CPUs in cycles per byte:
| Message Length in Bytes: | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
| AMD Ryzen 7 1700\* | | | | | 12.0 | 6.0 | 5.7 |
| Intel Xeon E5-2609 v4\* | | | | | 14.1 | 7.3 | 6.9 |
| Cortex-A53 (ARMv8)\* | | | | | 15.1 | 7.6 | 7.3 |
| Intel Core i5-6300U | 365 | 47 | 31 | 19 | 13.5 | 8.0 | 7.8 |
| Intel Core i5-4200U | 519 | 67 | 44 | 27 | 18.8 | 11.0 | 10.6 |
| Cortex-A15 (ARMv7)\* | | | | | 60.3 | 25.3 | 23.8 |
| Cortex-A7 (NEON) | 2204 | 226 | 132 | 82 | 55.9 | 31.7 | 30.7 |
| Cortex-A7 (ARMv7) | 1911 | 255 | 161 | 102 | 71.3 | 42.3 | 41.2 |
| ARM1176JZF-S (ARMv6) | 2267 | 303 | 191 | 120 | 84.4 | 50.0 | 50.2 |
\* Results taken from eBACS: http://bench.cr.yp.to/
## Implementation interface
All implementations use the interface defined by the ECRYPT Benchmarking of Cryptographic Systems (eBACS):
- https://bench.cr.yp.to/call-aead.html for CRYPTO\_AEAD (Ascon-128, Ascon-128a, Ascon-80pq)
- https://bench.cr.yp.to/call-hash.html for CRYPTO\_HASH (Ascon-Hash) and XOF (Ascon-Xof)
## Manually build and run a single Ascon target:
Build example for CRYPTO\_AEAD algorithms:
```
gcc -march=native -O3 -DNDEBUG -Icrypto_aead/ascon128v12/opt64 crypto_aead/ascon128v12/opt64/*.c -Itests tests/genkat_aead.c -o genkat
gcc -march=native -O3 -DNDEBUG -Icrypto_aead/ascon128v12/opt64 crypto_aead/ascon128v12/opt64/*.c -DCRYPTO_AEAD -Itests tests/getcycles.c -o getcycles
```
Build example for CRYPTO\_HASH algorithms:
```
gcc -march=native -O3 -DNDEBUG -Icrypto_hash/asconhashv12/opt64 crypto_hash/asconhashv12/opt64/*.c -Itests tests/genkat_hash.c -o genkat
gcc -march=native -O3 -DNDEBUG -Icrypto_hash/asconhashv12/opt64 crypto_hash/asconhashv12/opt64/*.c -DCRYPTO_HASH -Itests tests/getcycles.c -o getcycles
```
Generate KATs and get CPU cycles:
```
./genkat
./getcycles
```
## Build and test all Ascon v1.2 targets using performance flags:
```
mkdir build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Release
cmake --build .
ctest
```
## Build and test all Ascon v1.2 targets using NIST flags and sanitizers:
```
mkdir build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug
cmake --build .
ctest
```
## Build and run only specific algorithms, implementations and tests:
Build and test:
```
mkdir build && cd build
cmake .. -DVERSION_LIST="v12" -DALG_LIST="ascon128;asconhash" -DIMPL_LIST="opt64;bi32" -DTEST_LIST="genkat;getcycles"
cmake --build .
ctest -R genkat
```
Get CPU cycles:
```
./getcycles_crypto_aead_ascon128v12_opt64
./getcycles_crypto_aead_ascon128v12_bi32
./getcycles_crypto_hash_asconhashv12_opt64
./getcycles_crypto_hash_asconhashv12_bi32
```
## Hints to get more reliable getcycles results on Intel/AMD CPUs:
* Determine the processor base frequency (also called design frequency):
- e.g. using the Intel/AMD website
- or using `lscpu` listed under model name
* Disable turbo boost (this should lock the frequency to the next value
below the processor base frequency):
```
echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo
```
* If the above does not work, manually set the frequency using e.g. `cpufreq-set`.
* Determine the actual frequency (under load):
- e.g. by watching the frequency using `lscpu` or `cpufreq-info`
* Determine the scaling factor between the actual and base frequency:
- factor = actual frequency / base frequency
* Run the getcycles program using the frequency factor and watch the results:
```
while true; do ./getcycles_crypto_aead_ascon128v12_opt64 $factor; done
```
## Hints to activate the performance monitor unit (PMU) on ARM CPUs:
* First try to install `linux-tools` and see if it works.
* On many ARM platforms, the PMU has to be enabled using a kernel module:
- Source code for Armv6 (32-bit):
<http://sandsoftwaresound.net/raspberry-pi/raspberry-pi-gen-1/performance-counter-kernel-module/>
- Source code for Armv7 (32-bit):
<https://github.com/thoughtpolice/enable_arm_pmu>
- Source code for Armv8/Aarch64 (64-bit):
<https://github.com/rdolbeau/enable_arm_pmu>
* Steps to compile the kernel module on the raspberry pi:
- Find out the kernel version using `uname -a`
- Download the kernel header files, e.g. `raspberrypi-kernel-header`
- Download the source code for the Armv6 kernel module
- Build, install and load the kernel module
## Benchmark Ascon v1.2 using supercop
Download supercop according to the website: http://bench.cr.yp.to/supercop.html
To test only Ascon, just run the following commands:
```
./do-part init
./do-part crypto_aead ascon128v12
./do-part crypto_aead ascon128av12
./do-part crypto_aead ascon80pqv12
./do-part crypto_hash asconhashv12
./do-part crypto_hash asconxofv12
```
#!/bin/sh
FACTOR=$1
for i in getcycles*; do
echo
echo $i:
echo
echo "| 1 | 8 | 16 | 32 | 64 | 1536 | long |"
echo "|------:|------:|------:|------:|------:|------:|------:|"
for n in $(seq 5); do
./$i $FACTOR | tail -n 1
done | sort -n -k8 -t'|'
done 2>/dev/null
echo
...@@ -3,3 +3,4 @@ ...@@ -3,3 +3,4 @@
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
...@@ -3,7 +3,10 @@ ...@@ -3,7 +3,10 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines /* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x) #define U64BIG(x) (x)
#define U32BIG(x) (x) #define U32BIG(x) (x)
#define U16BIG(x) (x) #define U16BIG(x) (x)
...@@ -11,21 +14,26 @@ ...@@ -11,21 +14,26 @@
#elif defined(_MSC_VER) || \ #elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines /* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \ #define U64BIG(x) \
((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ (((0x00000000000000FFULL & (x)) << 56) | \
(((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ ((0x000000000000FF00ULL & (x)) << 40) | \
(((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ ((0x0000000000FF0000ULL & (x)) << 24) | \
(((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) ((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \ #define U32BIG(x) \
((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
(((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) \ #define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8))
#else #else
#error "ascon byte order macros not defined in endian.h" #error "Ascon byte order macros not defined in endian.h"
#endif #endif
#endif // ENDIAN_H_ #endif /* ENDIAN_H_ */
...@@ -3,3 +3,4 @@ ...@@ -3,3 +3,4 @@
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'M'
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
...@@ -3,7 +3,10 @@ ...@@ -3,7 +3,10 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines /* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x) #define U64BIG(x) (x)
#define U32BIG(x) (x) #define U32BIG(x) (x)
#define U16BIG(x) (x) #define U16BIG(x) (x)
...@@ -11,21 +14,26 @@ ...@@ -11,21 +14,26 @@
#elif defined(_MSC_VER) || \ #elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines /* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \ #define U64BIG(x) \
((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ (((0x00000000000000FFULL & (x)) << 56) | \
(((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ ((0x000000000000FF00ULL & (x)) << 40) | \
(((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ ((0x0000000000FF0000ULL & (x)) << 24) | \
(((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) ((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \ #define U32BIG(x) \
((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
(((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) \ #define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8))
#else #else
#error "ascon byte order macros not defined in endian.h" #error "Ascon byte order macros not defined in endian.h"
#endif #endif
#endif // ENDIAN_H_ #endif /* ENDIAN_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
{0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) { P8ROUNDS(s); }
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) { P6ROUNDS(s); }
#endif
#endif
#ifndef PERMUTATIONS_H_ #ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_ #define PERMUTATIONS_H_
#include "endian.h" #include <stdint.h>
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
#define to_bit_interleaving(out, in) \
do { \
u32 hi = (in) >> 32; \
u32 lo = (u32)(in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
(out).e = (lo & 0x0000FFFF) | (hi << 16); \
(out).o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
#define from_bit_interleaving(out, in) \
do { \
u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \
u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
} while (0)
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
x2.e ^= C_e; x2.o ^= C_o; \
/* s-box layer */ \
x0.e ^= x4.e; x0.o ^= x4.o; \
x4.e ^= x3.e; x4.o ^= x3.o; \
x2.e ^= x1.e; x2.o ^= x1.o; \
t0.e = x0.e; t0.o = x0.o; \
t4.e = x4.e; t4.o = x4.o; \
t3.e = x3.e; t3.o = x3.o; \
t1.e = x1.e; t1.o = x1.o; \
t2.e = x2.e; t2.o = x2.o; \
x0.e = t0.e ^ (~t1.e & t2.e); x0.o = t0.o ^ (~t1.o & t2.o); \
x2.e = t2.e ^ (~t3.e & t4.e); x2.o = t2.o ^ (~t3.o & t4.o); \
x4.e = t4.e ^ (~t0.e & t1.e); x4.o = t4.o ^ (~t0.o & t1.o); \
x1.e = t1.e ^ (~t2.e & t3.e); x1.o = t1.o ^ (~t2.o & t3.o); \
x3.e = t3.e ^ (~t4.e & t0.e); x3.o = t3.o ^ (~t4.o & t0.o); \
x1.e ^= x0.e; x1.o ^= x0.o; \
x3.e ^= x2.e; x3.o ^= x2.o; \
x0.e ^= x4.e; x0.o ^= x4.o; \
/* linear layer */ \
t0.e = x0.e ^ ROTR32(x0.o, 4); t0.o = x0.o ^ ROTR32(x0.e, 5); \
t1.e = x1.e ^ ROTR32(x1.e, 11); t1.o = x1.o ^ ROTR32(x1.o, 11); \
t2.e = x2.e ^ ROTR32(x2.o, 2); t2.o = x2.o ^ ROTR32(x2.e, 3); \
t3.e = x3.e ^ ROTR32(x3.o, 3); t3.o = x3.o ^ ROTR32(x3.e, 4); \
t4.e = x4.e ^ ROTR32(x4.e, 17); t4.o = x4.o ^ ROTR32(x4.o, 17); \
x0.e ^= ROTR32(t0.o, 9); x0.o ^= ROTR32(t0.e, 10); \
x1.e ^= ROTR32(t1.o, 19); x1.o ^= ROTR32(t1.e, 20); \
x2.e ^= t2.o; x2.o ^= ROTR32(t2.e, 1); \
x3.e ^= ROTR32(t3.e, 5); x3.o ^= ROTR32(t3.o, 5); \
x4.e ^= ROTR32(t4.o, 3); x4.o ^= ROTR32(t4.e, 4); \
x2.e = ~x2.e; x2.o = ~x2.o; \
} while(0)
#define P12() \
do { \
ROUND(0xc, 0xc); \
ROUND(0x9, 0xc); \
ROUND(0xc, 0x9); \
ROUND(0x9, 0x9); \
ROUND(0x6, 0xc); \
ROUND(0x3, 0xc); \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#define P8() \
do { \
ROUND(0x6, 0xc); \
ROUND(0x3, 0xc); \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#define P6() \
do { \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#endif // PERMUTATIONS_H_
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
s->x3 = WORD_T(0);
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
state_t t;
/* round constant */
s->x2.e ^= C_e;
s->x2.o ^= C_o;
/* s-box layer */
s->x0.e ^= s->x4.e;
s->x0.o ^= s->x4.o;
s->x4.e ^= s->x3.e;
s->x4.o ^= s->x3.o;
s->x2.e ^= s->x1.e;
s->x2.o ^= s->x1.o;
t.x0.e = s->x0.e;
t.x0.o = s->x0.o;
t.x4.e = s->x4.e;
t.x4.o = s->x4.o;
t.x3.e = s->x3.e;
t.x3.o = s->x3.o;
t.x1.e = s->x1.e;
t.x1.o = s->x1.o;
t.x2.e = s->x2.e;
t.x2.o = s->x2.o;
s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e);
s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o);
s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e);
s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o);
s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e);
s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o);
s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e);
s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o);
s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e);
s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o);
s->x1.e ^= s->x0.e;
s->x1.o ^= s->x0.o;
s->x3.e ^= s->x2.e;
s->x3.o ^= s->x2.o;
s->x0.e ^= s->x4.e;
s->x0.o ^= s->x4.o;
/* linear layer */
t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4);
t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5);
t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11);
t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11);
t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2);
t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3);
t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3);
t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4);
t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17);
t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17);
s->x0.e ^= ROR32(t.x0.o, 9);
s->x0.o ^= ROR32(t.x0.e, 10);
s->x1.e ^= ROR32(t.x1.o, 19);
s->x1.o ^= ROR32(t.x1.e, 20);
s->x2.e ^= t.x2.o;
s->x2.o ^= ROR32(t.x2.e, 1);
s->x3.e ^= ROR32(t.x3.e, 5);
s->x3.o ^= ROR32(t.x3.o, 5);
s->x4.e ^= ROR32(t.x4.o, 3);
s->x4.o ^= ROR32(t.x4.e, 4);
s->x2.e = ~s->x2.e;
s->x2.o = ~s->x2.o;
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "config.h"
typedef struct {
uint32_t e;
uint32_t o;
} word_t;
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline uint64_t TOBI32(uint64_t in);
__forceinline uint64_t FROMBI32(uint64_t in);
__forceinline word_t U64TOWORD(uint64_t x) {
uint64_t w = TOBI32(x);
return (word_t){.o = w >> 32, .e = w};
}
__forceinline uint64_t WORDTOU64(word_t w) {
return FROMBI32((uint64_t)w.o << 32 | w.e);
}
#define XOR(a, b) \
do { \
word_t tb = b; \
(a).e ^= tb.e; \
(a).o ^= tb.o; \
} while (0)
#define AND(a, b) \
do { \
word_t tb = b; \
(a).e &= tb.e; \
(a).o &= tb.o; \
} while (0)
__forceinline uint32_t ROR32(uint32_t x, int n) {
return x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16;
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i];
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i];
return result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return WORD_T((uint64_t)mask << 32 | mask);
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t TOBI32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t FROMBI32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
return (uint64_t)hi << 32 | lo;
}
#endif /* WORD_H_ */
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
word_t rx;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'B'
#endif
/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */
#ifndef ASCON_MASK_RNG
#define ASCON_MASK_RNG 'X'
#endif
/* mask key/data loads */
#ifndef ASCON_MASK_LOADS
#define ASCON_MASK_LOADS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif /* ENDIAN_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
{0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) { P8ROUNDS(s); }
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) { P6ROUNDS(s); }
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#include "random.h"
#include <stdlib.h>
#include <time.h>
#include "config.h"
#if ASCON_MASK_RNG == 'R'
void randinit() { srand(time(0)); }
uint32_t rand32() {
uint32_t r;
randombytes(&r, 4);
return r;
}
uint64_t rand64() {
uint64_t r;
randombytes(&r, 8);
return r;
}
#elif ASCON_MASK_RNG == 'S'
void randinit() { srand(time(0)); }
uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); }
uint64_t rand64() {
return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand();
}
#elif ASCON_MASK_RNG == 'X'
uint32_t xorshift32;
uint64_t xorshift64;
void randinit() {
srand(time(0));
xorshift32 = rand();
xorshift64 = (uint64_t)rand() << 32 | rand();
}
uint32_t rand32() {
uint32_t x = xorshift32;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return xorshift32 = x;
}
uint64_t rand64() {
uint64_t x = xorshift64;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
return xorshift64 = x;
}
#endif
#ifndef RANDOM_H_
#define RANDOM_H_
#include <stdint.h>
void randinit();
uint32_t rand32();
uint64_t rand64();
#endif /* RANDOM_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
#include "random.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = TOSHARES(0);
*K1 = TOSHARES(0);
*K2 = TOSHARES(0);
}
__forceinline void PINIT(state_t* s) {
randinit();
s->x0 = TOSHARES(0);
s->x1 = TOSHARES(0);
s->x2 = TOSHARES(0);
s->x3 = TOSHARES(0);
s->x4 = TOSHARES(0);
s->rx = TOSHARES(0);
}
#define TOFFOLI(a0, a1, b0, b1, c0, c1) \
do { \
(a0) ^= (~(b0)) & (c1); \
(a0) ^= (~(b0)) & (c0); \
(a1) ^= (b1) & (c1); \
(a1) ^= (b1) & (c0); \
} while (0)
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
state_t t;
/* refresh randomness */
/* s->rx = TOSHARES(0); */
/* addition of round constant */
s->x2.s0.e ^= C_e;
/* substitution layer */
s->x0.s0.e ^= s->x4.s0.e;
s->x4.s0.e ^= s->x3.s0.e;
s->x2.s0.e ^= s->x1.s0.e;
s->x0.s1.e ^= s->x4.s1.e;
s->x4.s1.e ^= s->x3.s1.e;
s->x2.s1.e ^= s->x1.s1.e;
/* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */
s->rx.s0.e = s->rx.s1.e;
TOFFOLI(s->rx.s0.e, s->rx.s1.e, s->x4.s0.e, s->x4.s1.e, s->x0.s0.e,
s->x0.s1.e);
TOFFOLI(s->x0.s0.e, s->x0.s1.e, s->x1.s0.e, s->x1.s1.e, s->x2.s0.e,
s->x2.s1.e);
TOFFOLI(s->x2.s0.e, s->x2.s1.e, s->x3.s0.e, s->x3.s1.e, s->x4.s0.e,
s->x4.s1.e);
TOFFOLI(s->x4.s0.e, s->x4.s1.e, s->x0.s0.e, s->x0.s1.e, s->x1.s0.e,
s->x1.s1.e);
TOFFOLI(s->x1.s0.e, s->x1.s1.e, s->x2.s0.e, s->x2.s1.e, s->x3.s0.e,
s->x3.s1.e);
s->x3.s1.e ^= s->rx.s1.e;
s->x3.s0.e ^= s->rx.s0.e;
/* end of shared keccak s-box */
s->x1.s0.e ^= s->x0.s0.e;
s->x0.s0.e ^= s->x4.s0.e;
s->x3.s0.e ^= s->x2.s0.e;
s->x2.s0.e = ~s->x2.s0.e;
s->x1.s1.e ^= s->x0.s1.e;
s->x0.s1.e ^= s->x4.s1.e;
s->x3.s1.e ^= s->x2.s1.e;
/* addition of round constant */
s->x2.s0.o ^= C_o;
/* substitution layer */
s->x0.s0.o ^= s->x4.s0.o;
s->x4.s0.o ^= s->x3.s0.o;
s->x2.s0.o ^= s->x1.s0.o;
s->x0.s1.o ^= s->x4.s1.o;
s->x4.s1.o ^= s->x3.s1.o;
s->x2.s1.o ^= s->x1.s1.o;
/* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */
s->rx.s0.o = s->rx.s1.o;
TOFFOLI(s->rx.s0.o, s->rx.s1.o, s->x4.s0.o, s->x4.s1.o, s->x0.s0.o,
s->x0.s1.o);
TOFFOLI(s->x0.s0.o, s->x0.s1.o, s->x1.s0.o, s->x1.s1.o, s->x2.s0.o,
s->x2.s1.o);
TOFFOLI(s->x2.s0.o, s->x2.s1.o, s->x3.s0.o, s->x3.s1.o, s->x4.s0.o,
s->x4.s1.o);
TOFFOLI(s->x4.s0.o, s->x4.s1.o, s->x0.s0.o, s->x0.s1.o, s->x1.s0.o,
s->x1.s1.o);
TOFFOLI(s->x1.s0.o, s->x1.s1.o, s->x2.s0.o, s->x2.s1.o, s->x3.s0.o,
s->x3.s1.o);
s->x3.s1.o ^= s->rx.s1.o;
s->x3.s0.o ^= s->rx.s0.o;
/* end of shared keccak s-box */
s->x1.s0.o ^= s->x0.s0.o;
s->x0.s0.o ^= s->x4.s0.o;
s->x3.s0.o ^= s->x2.s0.o;
s->x2.s0.o = ~s->x2.s0.o;
s->x1.s1.o ^= s->x0.s1.o;
s->x0.s1.o ^= s->x4.s1.o;
s->x3.s1.o ^= s->x2.s1.o;
/* linear diffusion layer */
t.x0.s1.e = s->x0.s1.e ^ ROR32(s->x0.s1.o, 4);
t.x0.s1.o = s->x0.s1.o ^ ROR32(s->x0.s1.e, 5);
t.x1.s1.e = s->x1.s1.e ^ ROR32(s->x1.s1.e, 11);
t.x1.s1.o = s->x1.s1.o ^ ROR32(s->x1.s1.o, 11);
t.x2.s1.e = s->x2.s1.e ^ ROR32(s->x2.s1.o, 2);
t.x2.s1.o = s->x2.s1.o ^ ROR32(s->x2.s1.e, 3);
t.x3.s1.e = s->x3.s1.e ^ ROR32(s->x3.s1.o, 3);
t.x3.s1.o = s->x3.s1.o ^ ROR32(s->x3.s1.e, 4);
t.x4.s1.e = s->x4.s1.e ^ ROR32(s->x4.s1.e, 17);
t.x4.s1.o = s->x4.s1.o ^ ROR32(s->x4.s1.o, 17);
s->x0.s1.e ^= ROR32(t.x0.s1.o, 9);
s->x0.s1.o ^= ROR32(t.x0.s1.e, 10);
s->x1.s1.e ^= ROR32(t.x1.s1.o, 19);
s->x1.s1.o ^= ROR32(t.x1.s1.e, 20);
s->x2.s1.e ^= t.x2.s1.o;
s->x2.s1.o ^= ROR32(t.x2.s1.e, 1);
s->x3.s1.e ^= ROR32(t.x3.s1.e, 5);
s->x3.s1.o ^= ROR32(t.x3.s1.o, 5);
s->x4.s1.e ^= ROR32(t.x4.s1.o, 3);
s->x4.s1.o ^= ROR32(t.x4.s1.e, 4);
t.x0.s0.e = s->x0.s0.e ^ ROR32(s->x0.s0.o, 4);
t.x0.s0.o = s->x0.s0.o ^ ROR32(s->x0.s0.e, 5);
t.x1.s0.e = s->x1.s0.e ^ ROR32(s->x1.s0.e, 11);
t.x1.s0.o = s->x1.s0.o ^ ROR32(s->x1.s0.o, 11);
t.x2.s0.e = s->x2.s0.e ^ ROR32(s->x2.s0.o, 2);
t.x2.s0.o = s->x2.s0.o ^ ROR32(s->x2.s0.e, 3);
t.x3.s0.e = s->x3.s0.e ^ ROR32(s->x3.s0.o, 3);
t.x3.s0.o = s->x3.s0.o ^ ROR32(s->x3.s0.e, 4);
t.x4.s0.e = s->x4.s0.e ^ ROR32(s->x4.s0.e, 17);
t.x4.s0.o = s->x4.s0.o ^ ROR32(s->x4.s0.o, 17);
s->x0.s0.e ^= ROR32(t.x0.s0.o, 9);
s->x0.s0.o ^= ROR32(t.x0.s0.e, 10);
s->x1.s0.e ^= ROR32(t.x1.s0.o, 19);
s->x1.s0.o ^= ROR32(t.x1.s0.e, 20);
s->x2.s0.e ^= t.x2.s0.o;
s->x2.s0.o ^= ROR32(t.x2.s0.e, 1);
s->x3.s0.e ^= ROR32(t.x3.s0.e, 5);
s->x3.s0.o ^= ROR32(t.x3.s0.o, 5);
s->x4.s0.e ^= ROR32(t.x4.s0.o, 3);
s->x4.s0.o ^= ROR32(t.x4.s0.e, 4);
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#include "word.h"
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
share_t TOBI32(share_t in) {
uint32_t r0, r1;
uint32_t lo = in.e;
uint32_t hi = in.o;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
in.e = r0;
in.o = r1;
return in;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
share_t FROMBI32(share_t in) {
uint32_t r0 = in.e;
uint32_t r1 = in.o;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
in.e = lo;
in.o = hi;
return in;
}
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "config.h"
#include "random.h"
typedef struct {
uint32_t e;
uint32_t o;
} share_t;
typedef struct {
share_t s0;
share_t s1;
} word_t;
__forceinline word_t WORD_T(uint64_t x) {
word_t w;
w.s0.e = (uint32_t)x;
w.s0.o = x >> 32;
w.s1.e = 0;
w.s1.o = 0;
return w;
}
__forceinline uint64_t UINT64_T(word_t w) {
return (uint64_t)w.s0.o << 32 | w.s0.e;
}
share_t TOBI32(share_t in);
share_t FROMBI32(share_t in);
__forceinline word_t TOSHARES(uint64_t in) {
uint32_t r0 = rand32();
uint32_t r1 = rand32();
word_t w;
w.s0.e = (uint32_t)in ^ r0;
w.s0.o = (in >> 32) ^ r1;
w.s1.e = r0;
w.s1.o = r1;
return w;
}
__forceinline uint64_t FROMSHARES(word_t in) {
return (uint64_t)(in.s0.o ^ in.s1.o) << 32 | (in.s0.e ^ in.s1.e);
}
__forceinline word_t U64TOWORD(uint64_t x) {
#if ASCON_MASK_LOADS
word_t w = TOSHARES(x);
w.s0 = TOBI32(w.s0);
w.s1 = TOBI32(w.s1);
return w;
#else
word_t w = WORD_T(x);
w.s0 = TOBI32(w.s0);
return w;
#endif
}
__forceinline uint64_t WORDTOU64(word_t w) {
w.s0 = FROMBI32(w.s0);
w.s1 = FROMBI32(w.s1);
return (uint64_t)FROMSHARES(w);
}
#define XOR(a, b) \
do { \
word_t tb = b; \
(a).s0.e ^= tb.s0.e; \
(a).s0.o ^= tb.s0.o; \
(a).s1.e ^= tb.s1.e; \
(a).s1.o ^= tb.s1.o; \
} while (0)
#define AND(a, b) \
do { \
word_t ta = a; \
word_t tb = b; \
(a).s0.e = (ta.s0.e & tb.s0.e) ^ (ta.s0.e & tb.s1.e); \
(a).s0.o = (ta.s0.o & tb.s0.e) ^ (ta.s0.o & tb.s1.o); \
(a).s1.e = (ta.s1.e & tb.s0.e) ^ (ta.s1.e & tb.s1.e); \
(a).s1.o = (ta.s1.o & tb.s0.o) ^ (ta.s1.o & tb.s1.o); \
} while (0)
__forceinline uint32_t ROR32(uint32_t x, int n) {
return x >> n | x << (32 - n);
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.s0.e = lo2hi.s0.e << 16 | hi2lo.s0.e >> 16;
r.s0.o = lo2hi.s0.o << 16 | hi2lo.s0.o >> 16;
r.s1.e = lo2hi.s1.e << 16 | hi2lo.s1.e >> 16;
r.s1.o = lo2hi.s1.o << 16 | hi2lo.s1.o >> 16;
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i];
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i];
return result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return WORD_T((uint64_t)mask << 32 | mask);
}
#endif /* WORD_H_ */
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
word_t rx;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'B'
#endif
/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */
#ifndef ASCON_MASK_RNG
#define ASCON_MASK_RNG 'X'
#endif
/* mask key/data loads */
#ifndef ASCON_MASK_LOADS
#define ASCON_MASK_LOADS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif /* ENDIAN_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
{0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) { P8ROUNDS(s); }
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) { P6ROUNDS(s); }
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#include "random.h"
#include <stdlib.h>
#include <time.h>
#include "config.h"
#if ASCON_MASK_RNG == 'R'
void randinit() { srand(time(0)); }
uint32_t rand32() {
uint32_t r;
randombytes(&r, 4);
return r;
}
uint64_t rand64() {
uint64_t r;
randombytes(&r, 8);
return r;
}
#elif ASCON_MASK_RNG == 'S'
void randinit() { srand(time(0)); }
uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); }
uint64_t rand64() {
return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand();
}
#elif ASCON_MASK_RNG == 'X'
uint32_t xorshift32;
uint64_t xorshift64;
void randinit() {
srand(time(0));
xorshift32 = rand();
xorshift64 = (uint64_t)rand() << 32 | rand();
}
uint32_t rand32() {
uint32_t x = xorshift32;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return xorshift32 = x;
}
uint64_t rand64() {
uint64_t x = xorshift64;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
return xorshift64 = x;
}
#endif
#ifndef RANDOM_H_
#define RANDOM_H_
#include <stdint.h>
void randinit();
uint32_t rand32();
uint64_t rand64();
#endif /* RANDOM_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
#include "random.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = TOSHARES(0);
*K1 = TOSHARES(0);
*K2 = TOSHARES(0);
}
__forceinline void PINIT(state_t* s) {
randinit();
s->x0 = TOSHARES(0);
s->x1 = TOSHARES(0);
s->x2 = TOSHARES(0);
s->x3 = TOSHARES(0);
s->x4 = TOSHARES(0);
s->rx = TOSHARES(0);
}
#define TOFFOLI(a0, a1, a2, b0, b1, b2, c0, c1, c2) \
do { \
(a0) ^= (~(b0)) & (c0); \
(a0) ^= (b0) & (c2); \
(a0) ^= (b2) & (c0); \
(a1) ^= (~(b1)) & (c1); \
(a1) ^= (b1) & (c0); \
(a1) ^= (b0) & (c1); \
(a2) ^= (~(b2)) & (c2); \
(a2) ^= (b2) & (c1); \
(a2) ^= (b1) & (c2); \
} while (0)
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
state_t t;
/* refresh randomness */
/* s->rx = TOSHARES(0); */
/* addition of round constant */
s->x2.s0.e ^= C_e;
/* substitution layer */
s->x0.s0.e ^= s->x4.s0.e;
s->x4.s0.e ^= s->x3.s0.e;
s->x2.s0.e ^= s->x1.s0.e;
s->x0.s1.e ^= s->x4.s1.e;
s->x4.s1.e ^= s->x3.s1.e;
s->x2.s1.e ^= s->x1.s1.e;
s->x0.s2.e ^= s->x4.s2.e;
s->x4.s2.e ^= s->x3.s2.e;
s->x2.s2.e ^= s->x1.s2.e;
/* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */
s->rx.s2.e = s->rx.s0.e;
s->rx.s0.e ^= s->rx.s1.e;
TOFFOLI(s->rx.s0.e, s->rx.s1.e, s->rx.s2.e, s->x4.s0.e, s->x4.s1.e,
s->x4.s2.e, s->x0.s0.e, s->x0.s1.e, s->x0.s2.e);
TOFFOLI(s->x0.s0.e, s->x0.s1.e, s->x0.s2.e, s->x1.s0.e, s->x1.s1.e,
s->x1.s2.e, s->x2.s0.e, s->x2.s1.e, s->x2.s2.e);
TOFFOLI(s->x2.s0.e, s->x2.s1.e, s->x2.s2.e, s->x3.s0.e, s->x3.s1.e,
s->x3.s2.e, s->x4.s0.e, s->x4.s1.e, s->x4.s2.e);
TOFFOLI(s->x4.s0.e, s->x4.s1.e, s->x4.s2.e, s->x0.s0.e, s->x0.s1.e,
s->x0.s2.e, s->x1.s0.e, s->x1.s1.e, s->x1.s2.e);
TOFFOLI(s->x1.s0.e, s->x1.s1.e, s->x1.s2.e, s->x2.s0.e, s->x2.s1.e,
s->x2.s2.e, s->x3.s0.e, s->x3.s1.e, s->x3.s2.e);
s->x3.s2.e ^= s->rx.s2.e;
s->x3.s1.e ^= s->rx.s1.e;
s->x3.s0.e ^= s->rx.s0.e;
/* end of shared keccak s-box */
s->x1.s0.e ^= s->x0.s0.e;
s->x0.s0.e ^= s->x4.s0.e;
s->x3.s0.e ^= s->x2.s0.e;
s->x2.s0.e = ~s->x2.s0.e;
s->x1.s1.e ^= s->x0.s1.e;
s->x0.s1.e ^= s->x4.s1.e;
s->x3.s1.e ^= s->x2.s1.e;
s->x1.s2.e ^= s->x0.s2.e;
s->x0.s2.e ^= s->x4.s2.e;
s->x3.s2.e ^= s->x2.s2.e;
/* addition of round constant */
s->x2.s0.o ^= C_o;
/* substitution layer */
s->x0.s0.o ^= s->x4.s0.o;
s->x4.s0.o ^= s->x3.s0.o;
s->x2.s0.o ^= s->x1.s0.o;
s->x0.s1.o ^= s->x4.s1.o;
s->x4.s1.o ^= s->x3.s1.o;
s->x2.s1.o ^= s->x1.s1.o;
s->x0.s2.o ^= s->x4.s2.o;
s->x4.s2.o ^= s->x3.s2.o;
s->x2.s2.o ^= s->x1.s2.o;
/* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */
s->rx.s2.o = s->rx.s0.o;
s->rx.s0.o ^= s->rx.s1.o;
TOFFOLI(s->rx.s0.o, s->rx.s1.o, s->rx.s2.o, s->x4.s0.o, s->x4.s1.o,
s->x4.s2.o, s->x0.s0.o, s->x0.s1.o, s->x0.s2.o);
TOFFOLI(s->x0.s0.o, s->x0.s1.o, s->x0.s2.o, s->x1.s0.o, s->x1.s1.o,
s->x1.s2.o, s->x2.s0.o, s->x2.s1.o, s->x2.s2.o);
TOFFOLI(s->x2.s0.o, s->x2.s1.o, s->x2.s2.o, s->x3.s0.o, s->x3.s1.o,
s->x3.s2.o, s->x4.s0.o, s->x4.s1.o, s->x4.s2.o);
TOFFOLI(s->x4.s0.o, s->x4.s1.o, s->x4.s2.o, s->x0.s0.o, s->x0.s1.o,
s->x0.s2.o, s->x1.s0.o, s->x1.s1.o, s->x1.s2.o);
TOFFOLI(s->x1.s0.o, s->x1.s1.o, s->x1.s2.o, s->x2.s0.o, s->x2.s1.o,
s->x2.s2.o, s->x3.s0.o, s->x3.s1.o, s->x3.s2.o);
s->x3.s2.o ^= s->rx.s2.o;
s->x3.s1.o ^= s->rx.s1.o;
s->x3.s0.o ^= s->rx.s0.o;
/* end of shared keccak s-box */
s->x1.s0.o ^= s->x0.s0.o;
s->x0.s0.o ^= s->x4.s0.o;
s->x3.s0.o ^= s->x2.s0.o;
s->x2.s0.o = ~s->x2.s0.o;
s->x1.s1.o ^= s->x0.s1.o;
s->x0.s1.o ^= s->x4.s1.o;
s->x3.s1.o ^= s->x2.s1.o;
s->x1.s2.o ^= s->x0.s2.o;
s->x0.s2.o ^= s->x4.s2.o;
s->x3.s2.o ^= s->x2.s2.o;
/* linear diffusion layer */
t.x0.s2.e = s->x0.s2.e ^ ROR32(s->x0.s2.o, 4);
t.x0.s2.o = s->x0.s2.o ^ ROR32(s->x0.s2.e, 5);
t.x1.s2.e = s->x1.s2.e ^ ROR32(s->x1.s2.e, 11);
t.x1.s2.o = s->x1.s2.o ^ ROR32(s->x1.s2.o, 11);
t.x2.s2.e = s->x2.s2.e ^ ROR32(s->x2.s2.o, 2);
t.x2.s2.o = s->x2.s2.o ^ ROR32(s->x2.s2.e, 3);
t.x3.s2.e = s->x3.s2.e ^ ROR32(s->x3.s2.o, 3);
t.x3.s2.o = s->x3.s2.o ^ ROR32(s->x3.s2.e, 4);
t.x4.s2.e = s->x4.s2.e ^ ROR32(s->x4.s2.e, 17);
t.x4.s2.o = s->x4.s2.o ^ ROR32(s->x4.s2.o, 17);
s->x0.s2.e ^= ROR32(t.x0.s2.o, 9);
s->x0.s2.o ^= ROR32(t.x0.s2.e, 10);
s->x1.s2.e ^= ROR32(t.x1.s2.o, 19);
s->x1.s2.o ^= ROR32(t.x1.s2.e, 20);
s->x2.s2.e ^= t.x2.s2.o;
s->x2.s2.o ^= ROR32(t.x2.s2.e, 1);
s->x3.s2.e ^= ROR32(t.x3.s2.e, 5);
s->x3.s2.o ^= ROR32(t.x3.s2.o, 5);
s->x4.s2.e ^= ROR32(t.x4.s2.o, 3);
s->x4.s2.o ^= ROR32(t.x4.s2.e, 4);
t.x0.s1.e = s->x0.s1.e ^ ROR32(s->x0.s1.o, 4);
t.x0.s1.o = s->x0.s1.o ^ ROR32(s->x0.s1.e, 5);
t.x1.s1.e = s->x1.s1.e ^ ROR32(s->x1.s1.e, 11);
t.x1.s1.o = s->x1.s1.o ^ ROR32(s->x1.s1.o, 11);
t.x2.s1.e = s->x2.s1.e ^ ROR32(s->x2.s1.o, 2);
t.x2.s1.o = s->x2.s1.o ^ ROR32(s->x2.s1.e, 3);
t.x3.s1.e = s->x3.s1.e ^ ROR32(s->x3.s1.o, 3);
t.x3.s1.o = s->x3.s1.o ^ ROR32(s->x3.s1.e, 4);
t.x4.s1.e = s->x4.s1.e ^ ROR32(s->x4.s1.e, 17);
t.x4.s1.o = s->x4.s1.o ^ ROR32(s->x4.s1.o, 17);
s->x0.s1.e ^= ROR32(t.x0.s1.o, 9);
s->x0.s1.o ^= ROR32(t.x0.s1.e, 10);
s->x1.s1.e ^= ROR32(t.x1.s1.o, 19);
s->x1.s1.o ^= ROR32(t.x1.s1.e, 20);
s->x2.s1.e ^= t.x2.s1.o;
s->x2.s1.o ^= ROR32(t.x2.s1.e, 1);
s->x3.s1.e ^= ROR32(t.x3.s1.e, 5);
s->x3.s1.o ^= ROR32(t.x3.s1.o, 5);
s->x4.s1.e ^= ROR32(t.x4.s1.o, 3);
s->x4.s1.o ^= ROR32(t.x4.s1.e, 4);
t.x0.s0.e = s->x0.s0.e ^ ROR32(s->x0.s0.o, 4);
t.x0.s0.o = s->x0.s0.o ^ ROR32(s->x0.s0.e, 5);
t.x1.s0.e = s->x1.s0.e ^ ROR32(s->x1.s0.e, 11);
t.x1.s0.o = s->x1.s0.o ^ ROR32(s->x1.s0.o, 11);
t.x2.s0.e = s->x2.s0.e ^ ROR32(s->x2.s0.o, 2);
t.x2.s0.o = s->x2.s0.o ^ ROR32(s->x2.s0.e, 3);
t.x3.s0.e = s->x3.s0.e ^ ROR32(s->x3.s0.o, 3);
t.x3.s0.o = s->x3.s0.o ^ ROR32(s->x3.s0.e, 4);
t.x4.s0.e = s->x4.s0.e ^ ROR32(s->x4.s0.e, 17);
t.x4.s0.o = s->x4.s0.o ^ ROR32(s->x4.s0.o, 17);
s->x0.s0.e ^= ROR32(t.x0.s0.o, 9);
s->x0.s0.o ^= ROR32(t.x0.s0.e, 10);
s->x1.s0.e ^= ROR32(t.x1.s0.o, 19);
s->x1.s0.o ^= ROR32(t.x1.s0.e, 20);
s->x2.s0.e ^= t.x2.s0.o;
s->x2.s0.o ^= ROR32(t.x2.s0.e, 1);
s->x3.s0.e ^= ROR32(t.x3.s0.e, 5);
s->x3.s0.o ^= ROR32(t.x3.s0.o, 5);
s->x4.s0.e ^= ROR32(t.x4.s0.o, 3);
s->x4.s0.o ^= ROR32(t.x4.s0.e, 4);
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#include "word.h"
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
share_t TOBI32(share_t in) {
uint32_t r0, r1;
uint32_t lo = in.e;
uint32_t hi = in.o;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
in.e = r0;
in.o = r1;
return in;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
share_t FROMBI32(share_t in) {
uint32_t r0 = in.e;
uint32_t r1 = in.o;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
in.e = lo;
in.o = hi;
return in;
}
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "config.h"
#include "random.h"
typedef struct {
uint32_t e;
uint32_t o;
} share_t;
typedef struct {
share_t s0;
share_t s1;
share_t s2;
} word_t;
__forceinline word_t WORD_T(uint64_t x) {
word_t w;
w.s0.e = (uint32_t)x;
w.s0.o = x >> 32;
w.s1.e = 0;
w.s1.o = 0;
w.s2.e = 0;
w.s2.o = 0;
return w;
}
__forceinline uint64_t UINT64_T(word_t w) {
return (uint64_t)w.s0.o << 32 | w.s0.e;
}
share_t TOBI32(share_t in);
share_t FROMBI32(share_t in);
__forceinline word_t TOSHARES(uint64_t in) {
uint32_t r0 = rand32();
uint32_t r1 = rand32();
uint32_t r2 = rand32();
uint32_t r3 = rand32();
word_t w;
w.s0.e = (uint32_t)in ^ r0 ^ r2;
w.s0.o = (in >> 32) ^ r1 ^ r3;
w.s1.e = r0;
w.s1.o = r1;
w.s2.e = r2;
w.s2.o = r3;
return w;
}
__forceinline uint64_t FROMSHARES(word_t in) {
return (uint64_t)(in.s0.o ^ in.s1.o ^ in.s2.o) << 32 |
(in.s0.e ^ in.s1.e ^ in.s2.e);
}
__forceinline word_t U64TOWORD(uint64_t x) {
#if ASCON_MASK_LOADS
word_t w = TOSHARES(x);
w.s0 = TOBI32(w.s0);
w.s1 = TOBI32(w.s1);
w.s2 = TOBI32(w.s2);
return w;
#else
word_t w = WORD_T(x);
w.s0 = TOBI32(w.s0);
return w;
#endif
}
__forceinline uint64_t WORDTOU64(word_t w) {
w.s0 = FROMBI32(w.s0);
w.s1 = FROMBI32(w.s1);
w.s2 = FROMBI32(w.s2);
return (uint64_t)FROMSHARES(w);
}
#define XOR(a, b) \
do { \
word_t tb = b; \
(a).s0.e ^= tb.s0.e; \
(a).s0.o ^= tb.s0.o; \
(a).s1.e ^= tb.s1.e; \
(a).s1.o ^= tb.s1.o; \
(a).s2.e ^= tb.s2.e; \
(a).s2.o ^= tb.s2.o; \
} while (0)
#define AND(a, b) \
do { \
word_t ta = a; \
word_t tb = b; \
(a).s0.e = \
(ta.s0.e & tb.s0.e) ^ (ta.s0.e & tb.s1.e) ^ (ta.s0.e & tb.s2.e); \
(a).s0.o = \
(ta.s0.o & tb.s0.o) ^ (ta.s0.o & tb.s1.o) ^ (ta.s0.o & tb.s2.o); \
(a).s1.e = \
(ta.s1.e & tb.s0.e) ^ (ta.s1.e & tb.s1.e) ^ (ta.s1.e & tb.s2.e); \
(a).s1.o = \
(ta.s1.o & tb.s0.o) ^ (ta.s1.o & tb.s1.o) ^ (ta.s1.o & tb.s2.o); \
(a).s2.e = \
(ta.s2.e & tb.s0.e) ^ (ta.s2.e & tb.s1.e) ^ (ta.s2.e & tb.s2.e); \
(a).s2.o = \
(ta.s2.o & tb.s0.o) ^ (ta.s2.o & tb.s1.o) ^ (ta.s2.o & tb.s2.o); \
} while (0)
__forceinline uint32_t ROR32(uint32_t x, int n) {
return x >> n | x << (32 - n);
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.s0.e = lo2hi.s0.e << 16 | hi2lo.s0.e >> 16;
r.s0.o = lo2hi.s0.o << 16 | hi2lo.s0.o >> 16;
r.s1.e = lo2hi.s1.e << 16 | hi2lo.s1.e >> 16;
r.s1.o = lo2hi.s1.o << 16 | hi2lo.s1.o >> 16;
r.s2.e = lo2hi.s2.e << 16 | hi2lo.s2.e >> 16;
r.s2.o = lo2hi.s2.o << 16 | hi2lo.s2.o >> 16;
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i] ^
((uint8_t*)&(a.s2))[i];
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i] ^
((uint8_t*)&(b.s2))[i];
return result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return WORD_T((uint64_t)mask << 32 | mask);
}
#endif /* WORD_H_ */
...@@ -3,3 +3,4 @@ ...@@ -3,3 +3,4 @@
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'H'
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
...@@ -3,7 +3,10 @@ ...@@ -3,7 +3,10 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines /* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x) #define U64BIG(x) (x)
#define U32BIG(x) (x) #define U32BIG(x) (x)
#define U16BIG(x) (x) #define U16BIG(x) (x)
...@@ -11,21 +14,26 @@ ...@@ -11,21 +14,26 @@
#elif defined(_MSC_VER) || \ #elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines /* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \ #define U64BIG(x) \
((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ (((0x00000000000000FFULL & (x)) << 56) | \
(((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ ((0x000000000000FF00ULL & (x)) << 40) | \
(((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ ((0x0000000000FF0000ULL & (x)) << 24) | \
(((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) ((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \ #define U32BIG(x) \
((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
(((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) \ #define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8))
#else #else
#error "ascon byte order macros not defined in endian.h" #error "Ascon byte order macros not defined in endian.h"
#endif #endif
#endif // ENDIAN_H_ #endif /* ENDIAN_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
{0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) { P8ROUNDS(s); }
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) { P6ROUNDS(s); }
#endif
#endif
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
s->x3 = WORD_T(0);
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
uint32_t tmp0, tmp1, tmp2, tmp3;
/* clang-format off */
__asm__ __volatile__( \
"eor %[x2_e], %[x2_e], %[C_e]\n\t" \
"eor %[x2_o], %[x2_o], %[C_o]\n\t" \
"eor %[x0_e], %[x0_e], %[x4_e]\n\t" \
"eor %[x0_o], %[x0_o], %[x4_o]\n\t" \
"eor %[x4_e], %[x4_e], %[x3_e]\n\t" \
"eor %[x4_o], %[x4_o], %[x3_o]\n\t" \
"eor %[x2_e], %[x2_e], %[x1_e]\n\t" \
"eor %[x2_o], %[x2_o], %[x1_o]\n\t" \
"bic %[tmp0], %[x0_e], %[x4_e]\n\t" \
"bic %[tmp1], %[x4_e], %[x3_e]\n\t" \
"bic %[tmp2], %[x2_e], %[x1_e]\n\t" \
"bic %[tmp3], %[x1_e], %[x0_e]\n\t" \
"eor %[x2_e], %[x2_e], %[tmp1]\n\t" \
"eor %[x0_e], %[x0_e], %[tmp2]\n\t" \
"eor %[x4_e], %[x4_e], %[tmp3]\n\t" \
"bic %[tmp3], %[x3_e], %[x2_e]\n\t" \
"eor %[x3_e], %[x3_e], %[tmp0]\n\t" \
"bic %[tmp2], %[x0_o], %[x4_o]\n\t" \
"bic %[tmp0], %[x2_o], %[x1_o]\n\t" \
"bic %[tmp1], %[x4_o], %[x3_o]\n\t" \
"eor %[x1_e], %[x1_e], %[tmp3]\n\t" \
"eor %[x0_o], %[x0_o], %[tmp0]\n\t" \
"eor %[x2_o], %[x2_o], %[tmp1]\n\t" \
"bic %[tmp3], %[x1_o], %[x0_o]\n\t" \
"bic %[tmp0], %[x3_o], %[x2_o]\n\t" \
"eor %[x3_o], %[x3_o], %[tmp2]\n\t" \
"eor %[x3_o], %[x3_o], %[x2_o]\n\t" \
"eor %[x4_o], %[x4_o], %[tmp3]\n\t" \
"eor %[x1_o], %[x1_o], %[tmp0]\n\t" \
"eor %[x3_e], %[x3_e], %[x2_e]\n\t" \
"eor %[x1_e], %[x1_e], %[x0_e]\n\t" \
"eor %[x1_o], %[x1_o], %[x0_o]\n\t" \
"eor %[x0_e], %[x0_e], %[x4_e]\n\t" \
"eor %[x0_o], %[x0_o], %[x4_o]\n\t" \
"mvn %[x2_e], %[x2_e]\n\t" \
"mvn %[x2_o], %[x2_o]\n\t" \
"eor %[tmp0], %[x0_e], %[x0_o], ror #4\n\t" \
"eor %[tmp1], %[x0_o], %[x0_e], ror #5\n\t" \
"eor %[tmp2], %[x1_e], %[x1_e], ror #11\n\t" \
"eor %[tmp3], %[x1_o], %[x1_o], ror #11\n\t" \
"eor %[x0_e], %[x0_e], %[tmp1], ror #9\n\t" \
"eor %[x0_o], %[x0_o], %[tmp0], ror #10\n\t" \
"eor %[x1_e], %[x1_e], %[tmp3], ror #19\n\t" \
"eor %[x1_o], %[x1_o], %[tmp2], ror #20\n\t" \
"eor %[tmp0], %[x2_e], %[x2_o], ror #2\n\t" \
"eor %[tmp1], %[x2_o], %[x2_e], ror #3\n\t" \
"eor %[tmp2], %[x3_e], %[x3_o], ror #3\n\t" \
"eor %[tmp3], %[x3_o], %[x3_e], ror #4\n\t" \
"eor %[x2_e], %[x2_e], %[tmp1]\n\t" \
"eor %[x2_o], %[x2_o], %[tmp0], ror #1\n\t" \
"eor %[x3_e], %[x3_e], %[tmp2], ror #5\n\t" \
"eor %[x3_o], %[x3_o], %[tmp3], ror #5\n\t" \
"eor %[tmp0], %[x4_e], %[x4_e], ror #17\n\t" \
"eor %[tmp1], %[x4_o], %[x4_o], ror #17\n\t" \
"eor %[x4_e], %[x4_e], %[tmp1], ror #3\n\t" \
"eor %[x4_o], %[x4_o], %[tmp0], ror #4\n\t" \
: [ x0_e ] "+r"(s->x0.e), \
[ x1_e ] "+r"(s->x1.e), \
[ x2_e ] "+r"(s->x2.e), \
[ x3_e ] "+r"(s->x3.e), \
[ x4_e ] "+r"(s->x4.e), \
[ x0_o ] "+r"(s->x0.o), \
[ x1_o ] "+r"(s->x1.o), \
[ x2_o ] "+r"(s->x2.o), \
[ x3_o ] "+r"(s->x3.o), \
[ x4_o ] "+r"(s->x4.o), \
[ tmp0 ] "=r"(tmp0), \
[ tmp1 ] "=r"(tmp1), \
[ tmp2 ] "=r"(tmp2), \
[ tmp3 ] "=r"(tmp3) \
: [ C_e ] "i"(C_e), \
[ C_o ] "i"(C_o) \
: );
/* clang-format on */
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "config.h"
typedef struct {
uint32_t e;
uint32_t o;
} word_t;
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline uint64_t TOBI32(uint64_t in);
__forceinline uint64_t FROMBI32(uint64_t in);
__forceinline word_t U64TOWORD(uint64_t x) {
uint64_t w = TOBI32(x);
return (word_t){.o = w >> 32, .e = w};
}
__forceinline uint64_t WORDTOU64(word_t w) {
return FROMBI32((uint64_t)w.o << 32 | w.e);
}
#define XOR(a, b) \
do { \
word_t tb = b; \
(a).e ^= tb.e; \
(a).o ^= tb.o; \
} while (0)
#define AND(a, b) \
do { \
word_t tb = b; \
(a).e &= tb.e; \
(a).o &= tb.o; \
} while (0)
__forceinline uint32_t ROR32(uint32_t x, int n) {
return x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16;
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i];
for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i];
return result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return WORD_T((uint64_t)mask << 32 | mask);
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t TOBI32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t FROMBI32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
return (uint64_t)hi << 32 | lo;
}
#endif /* WORD_H_ */
...@@ -3,3 +3,4 @@ ...@@ -3,3 +3,4 @@
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment