Commit f8a89b2c by Sebastian Renner

Merge branch 'email-submissions'

Merging branch "email-submissions" into master
parents eaa31add 0f49c046

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

---
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: false
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: WithoutElse
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: true
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<ext/.*\.h>'
Priority: 2
SortPriority: 0
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
Priority: 2
SortPriority: 0
- Regex: '.*'
Priority: 3
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentCaseLabels: true
IndentGotoLabels: true
IndentPPDirectives: None
IndentWidth: 2
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: google
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard: Auto
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 8
UseCRLF: false
UseTab: Never
...
cmake_minimum_required(VERSION 3.6)
project(ascon LANGUAGES C ASM)
enable_testing()
# set the default version, algorithms, implementations, tests, flags, defs
set(DEFAULT_VERSIONS v12)
set(DEFAULT_ALGS ascon128 ascon128a ascon80pq asconhash asconxof)
set(DEFAULT_IMPLS ref opt64 opt64_lowsize bi32 bi32_lowsize bi32_lowreg opt8 bi8)
set(DEFAULT_TESTS genkat getcycles)
set(DEFAULT_REL_FLAGS -std=c99 -O2 -fomit-frame-pointer -march=native -mtune=native)
set(DEFAULT_DBG_FLAGS -std=c99 -O2 -Wall -Wextra -Wshadow)
set(DEFAULT_COMPILE_DEFS)
# set cmake variables for version, algorithms, implementations, tests, flags, defs
set(VERSION_LIST ${DEFAULT_VERSIONS} CACHE STRING "Choose the ascon versions to include.")
set(ALG_LIST ${DEFAULT_ALGS} CACHE STRING "Choose the list of algorithms to include.")
set(IMPL_LIST ${DEFAULT_IMPLS} CACHE STRING "Choose the list of implementations to include.")
set(TEST_LIST ${DEFAULT_TESTS} CACHE STRING "Choose the list of tests to include.")
set(REL_FLAGS ${DEFAULT_REL_FLAGS} CACHE STRING "Define custom Release (performance) flags.")
set(DBG_FLAGS ${DEFAULT_DBG_FLAGS} CACHE STRING "Define custom Debug (NIST) flags.")
set(COMPILE_DEFS ${DEFAULT_COMPILE_DEFS} CACHE STRING "Define custom compile definitions.")
if(NOT WIN32 AND NOT CYGWIN AND NOT MSYS)
# use sanitizer in Debug build (but not on windows)
set(DBG_FLAGS ${DBG_FLAGS} -fsanitize=address,undefined -static-libasan)
endif()
if(MSVC)
set(DBG_FLAGS /Od)
endif()
# set the default build type for single-config generators if none was specified
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to 'Release' as none was specified.")
set(CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif()
# add platform specific implementations
message(STATUS "cmake host system name: ${CMAKE_HOST_SYSTEM_NAME}")
message(STATUS "cmake host system processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}")
if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL x86_64 OR ${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL AMD64)
set(DEFAULT_IMPLS ${DEFAULT_IMPLS})
elseif(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES ARM OR ${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES arm)
set(DEFAULT_IMPLS ${DEFAULT_IMPLS} bi32_arm neon)
endif()
set(KAT_PATH KAT)
set(TEST_PATH tests)
foreach(CRYPTO aead hash)
foreach(VER ${VERSION_LIST})
foreach(ALG ${ALG_LIST})
foreach(IMPL ${IMPL_LIST})
set(IMPL_PATH crypto_${CRYPTO}/${ALG}${VER}/${IMPL})
if(NOT EXISTS ${CMAKE_SOURCE_DIR}/${IMPL_PATH})
continue()
endif()
message("Adding implementation ${IMPL_PATH}")
string(REGEX REPLACE "/" "_" IMPL_NAME ${IMPL_PATH})
file(GLOB IMPL_FILES RELATIVE ${CMAKE_SOURCE_DIR} "${IMPL_PATH}/*.[ch]")
add_library(${IMPL_NAME} ${IMPL_FILES})
target_include_directories(${IMPL_NAME} PUBLIC ${IMPL_PATH} ${TEST_PATH})
target_compile_definitions(${IMPL_NAME} PRIVATE ${COMPILE_DEFS})
#target_compile_features(${IMPL_NAME} PUBLIC c_std_99) # cmake >= 3.8.2
target_compile_options(${IMPL_NAME} PUBLIC $<$<CONFIG:RELEASE>:${REL_FLAGS}>)
target_compile_options(${IMPL_NAME} PUBLIC $<$<CONFIG:DEBUG>:${DBG_FLAGS}>)
foreach(TEST_NAME ${TEST_LIST})
if(${TEST_NAME} STREQUAL genkat)
set(TEST_FILES ${TEST_PATH}/crypto_${CRYPTO}.h ${TEST_PATH}/${TEST_NAME}_${CRYPTO}.c)
else()
set(TEST_FILES ${TEST_PATH}/crypto_${CRYPTO}.h ${TEST_PATH}/${TEST_NAME}.c)
endif()
string(TOUPPER CRYPTO_${CRYPTO} CRYPTO_DEFINE)
set(EXE_NAME ${TEST_NAME}_${IMPL_NAME})
add_executable(${EXE_NAME} ${TEST_FILES})
target_compile_definitions(${EXE_NAME} PRIVATE ${CRYPTO_DEFINE})
target_link_libraries(${EXE_NAME} PRIVATE ${IMPL_NAME})
if(${TEST_NAME} STREQUAL genkat)
add_test(NAME ${EXE_NAME} COMMAND ${CMAKE_COMMAND}
-DEXE_NAME=${EXE_NAME} -DALG=${ALG}${VER}
-DSRC_DIR=${CMAKE_SOURCE_DIR} -DBIN_DIR=${CMAKE_BINARY_DIR}
-DCONFIG=$<CONFIGURATION> -P ${CMAKE_SOURCE_DIR}/genkat.cmake)
else()
add_test(${EXE_NAME} ${EXE_NAME})
endif()
endforeach()
endforeach()
endforeach()
endforeach()
endforeach()
CC0 1.0 Universal
Statement of Purpose
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator and
subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for the
purpose of contributing to a commons of creative, cultural and scientific
works ("Commons") that the public can reliably and without fear of later
claims of infringement build upon, modify, incorporate in other works, reuse
and redistribute as freely as possible in any form whatsoever and for any
purposes, including without limitation commercial purposes. These owners may
contribute to the Commons to promote the ideal of a free culture and the
further production of creative, cultural and scientific works, or to gain
reputation or greater distribution for their Work in part through the use and
efforts of others.
For these and/or other purposes and motivations, and without any expectation
of additional consideration or compensation, the person associating CC0 with a
Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
and publicly distribute the Work under its terms, with knowledge of his or her
Copyright and Related Rights in the Work and the meaning and intended legal
effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not limited
to, the following:
i. the right to reproduce, adapt, distribute, perform, display, communicate,
and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or likeness
depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data in
a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation thereof,
including any amended or successor version of such directive); and
vii. other similar, equivalent or corresponding rights throughout the world
based on applicable law or treaty, and any national implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention of,
applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
and Related Rights and associated claims and causes of action, whether now
known or unknown (including existing as well as future claims and causes of
action), in the Work (i) in all territories worldwide, (ii) for the maximum
duration provided by applicable law or treaty (including future time
extensions), (iii) in any current or future medium and for any number of
copies, and (iv) for any purpose whatsoever, including without limitation
commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
the Waiver for the benefit of each member of the public at large and to the
detriment of Affirmer's heirs and successors, fully intending that such Waiver
shall not be subject to revocation, rescission, cancellation, termination, or
any other legal or equitable action to disrupt the quiet enjoyment of the Work
by the public as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason be
judged legally invalid or ineffective under applicable law, then the Waiver
shall be preserved to the maximum extent permitted taking into account
Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
is so judged Affirmer hereby grants to each affected person a royalty-free,
non transferable, non sublicensable, non exclusive, irrevocable and
unconditional license to exercise Affirmer's Copyright and Related Rights in
the Work (i) in all territories worldwide, (ii) for the maximum duration
provided by applicable law or treaty (including future time extensions), (iii)
in any current or future medium and for any number of copies, and (iv) for any
purpose whatsoever, including without limitation commercial, advertising or
promotional purposes (the "License"). The License shall be deemed effective as
of the date CC0 was applied by Affirmer to the Work. Should any part of the
License for any reason be judged legally invalid or ineffective under
applicable law, such partial invalidity or ineffectiveness shall not
invalidate the remainder of the License, and in such case Affirmer hereby
affirms that he or she will not (i) exercise any of his or her remaining
Copyright and Related Rights in the Work or (ii) assert any associated claims
and causes of action with respect to the Work, in either case contrary to
Affirmer's express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or warranties
of any kind concerning the Work, express, implied, statutory or otherwise,
including without limitation warranties of title, merchantability, fitness
for a particular purpose, non infringement, or the absence of latent or
other defects, accuracy, or the present or absence of errors, whether or not
discoverable, all to the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without limitation
any person's Copyright and Related Rights in the Work. Further, Affirmer
disclaims responsibility for obtaining any necessary consents, permissions
or other rights required for any use of the Work.
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to this
CC0 or use of the Work.
For more information, please see
<http://creativecommons.org/publicdomain/zero/1.0/>
\ No newline at end of file
# Reference and optimized C and ASM implementations of Ascon
Ascon is a family of lightweight authenticated encryption schemes with associated data (AEAD), including a hash and extendible output function (XOF).
For more information on Ascon visit: https://ascon.iaik.tugraz.at/
This repository contains the following 5 Ascon algorithms:
- `crypto_aead/ascon128v12`: Ascon-128 v1.2
- `crypto_aead/ascon128av12`: Ascon-128a v1.2
- `crypto_aead/ascon80pqv12`: Ascon-80pq v1.2
- `crypto_hash/asconhashv12`: Ascon-Hash v1.2
- `crypto_hash/asconxofv12`: Ascon-Xof v1.2
and the following implementations:
- `ref`: reference implementation
- `opt64`: 64-bit speed-optimized C implementation
- `opt64_lowsize`: 64-bit size-optimized C implementation
- `neon`: NEON speed-optimized ARM inline assembly implementation
- `bi32`: 32-bit speed-optimized bit-interleaved C implementation
- `bi32_arm`: 32-bit speed-optimized bit-interleaved ARM inline assembly implementation
- `bi32_lowreg`: 32-bit speed-optimized bit-interleaved C implementation (low register usage)
- `bi32_lowsize`: 32-bit size-optimized bit-interleaved C implementation
- `opt8`: 8-bit optimized C implementation
- `bi8`: 8-bit optimized bit-interleaved C implementation
## Performance results of Ascon-128 on different CPUs in cycles per byte:
| Message Length in Bytes: | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
| AMD Ryzen 7 1700\* | | | | | 14.5 | 8.8 | 8.6 |
| Intel Xeon E5-2609 v4\* | | | | | 17.3 | 10.8 | 10.5 |
| Cortex-A53 (ARMv8)\* | | | | | 18.3 | 11.3 | 11.0 |
| Intel Core i5-6300U | 367 | 58 | 35 | 23 | 17.6 | 11.9 | 11.4 |
| Intel Core i5-4200U | 521 | 81 | 49 | 32 | 23.9 | 16.2 | 15.8 |
| Cortex-A15 (ARMv7)\* | | | | | 69.8 | 36.2 | 34.6 |
| Cortex-A7 (NEON) | 2182 | 249 | 148 | 97 | 71.7 | 47.5 | 46.5 |
| Cortex-A7 (ARMv7) | 1871 | 292 | 175 | 115 | 86.6 | 58.3 | 57.2 |
| ARM1176JZF-S (ARMv6) | 2136 | 312 | 186 | 123 | 91.6 | 61.8 | 62.2 |
\* Results taken from eBACS: http://bench.cr.yp.to/
## Performance results of Ascon-128a on different CPUs in cycles per byte:
| Message Length in Bytes: | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
| AMD Ryzen 7 1700\* | | | | | 12.0 | 6.0 | 5.7 |
| Intel Xeon E5-2609 v4\* | | | | | 14.1 | 7.3 | 6.9 |
| Cortex-A53 (ARMv8)\* | | | | | 15.1 | 7.6 | 7.3 |
| Intel Core i5-6300U | 365 | 47 | 31 | 19 | 13.5 | 8.0 | 7.8 |
| Intel Core i5-4200U | 519 | 67 | 44 | 27 | 18.8 | 11.0 | 10.6 |
| Cortex-A15 (ARMv7)\* | | | | | 60.3 | 25.3 | 23.8 |
| Cortex-A7 (NEON) | 2204 | 226 | 132 | 82 | 55.9 | 31.7 | 30.7 |
| Cortex-A7 (ARMv7) | 1911 | 255 | 161 | 102 | 71.3 | 42.3 | 41.2 |
| ARM1176JZF-S (ARMv6) | 2118 | 261 | 170 | 107 | 75.6 | 46.0 | 46.6 |
\* Results taken from eBACS: http://bench.cr.yp.to/
## Implementation interface
All implementations use the interface defined by the ECRYPT Benchmarking of Cryptographic Systems (eBACS):
- https://bench.cr.yp.to/call-aead.html for CRYPTO\_AEAD (Ascon-128, Ascon-128a, Ascon-80pq)
- https://bench.cr.yp.to/call-hash.html for CRYPTO\_HASH (Ascon-Hash) and XOF (Ascon-Xof)
## Manually build and run a single Ascon target:
Build example for CRYPTO\_AEAD algorithms:
```
gcc -march=native -O3 -DNDEBUG -Icrypto_aead/ascon128v12/opt64 crypto_aead/ascon128v12/opt64/*.c -Itests tests/genkat_aead.c -o genkat
gcc -march=native -O3 -DNDEBUG -Icrypto_aead/ascon128v12/opt64 crypto_aead/ascon128v12/opt64/*.c -DCRYPTO_AEAD -Itests tests/getcycles.c -o getcycles
```
Build example for CRYPTO\_HASH algorithms:
```
gcc -march=native -O3 -DNDEBUG -Icrypto_hash/asconhashv12/opt64 crypto_hash/asconhashv12/opt64/*.c -Itests tests/genkat_hash.c -o genkat
gcc -march=native -O3 -DNDEBUG -Icrypto_hash/asconhashv12/opt64 crypto_hash/asconhashv12/opt64/*.c -DCRYPTO_HASH -Itests tests/getcycles.c -o getcycles
```
Generate KATs and get CPU cycles:
```
./genkat
./getcycles
```
## Build and test all Ascon v1.2 targets using performance flags:
```
mkdir build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Release
cmake --build .
ctest
```
## Build and test all Ascon v1.2 targets using NIST flags and sanitizers:
```
mkdir build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug
cmake --build .
ctest
```
## Build and run only specific algorithms, implementations and tests:
Build and test:
```
mkdir build && cd build
cmake .. -DVERSION_LIST="v12" -DALG_LIST="ascon128;asconhash" -DIMPL_LIST="opt64;bi32" -DTEST_LIST="genkat;getcycles"
cmake --build .
ctest -R genkat
```
Get CPU cycles:
```
./getcycles_crypto_aead_ascon128v12_opt64
./getcycles_crypto_aead_ascon128v12_bi32
./getcycles_crypto_hash_asconhashv12_opt64
./getcycles_crypto_hash_asconhashv12_bi32
```
## Hints to get more reliable getcycles results on Intel/AMD CPUs:
* Determine the processor base frequency (also called design frequency):
- e.g. using the Intel/AMD website
- or using `lscpu` listed under model name
* Disable turbo boost (this should lock the frequency to the next value
below the processor base frequency):
```
echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo
```
* If the above does not work, manually set the frequency using e.g. `cpufreq-set`.
* Determine the actual frequency (under load):
- e.g. by watching the frequency using `lscpu` or `cpufreq-info`
* Determine the scaling factor between the actual and base frequency:
- factor = actual frequency / base frequency
* Run a getcycles program using the frequency factor and watch the results:
```
while true; do ./getcycles_crypto_aead_ascon128v12_opt64 $factor; done
```
* Run the `benchmark.sh` script with a specific algorithm and frequency factor
to benchmark all built implementations:
```
./benchmark.sh ascon128v12 $factor
```
## Hints to activate the performance monitor unit (PMU) on ARM CPUs:
* First try to install `linux-tools` and see if it works.
* On many ARM platforms, the PMU has to be enabled using a kernel module:
- Source code for Armv6 (32-bit):
<http://sandsoftwaresound.net/raspberry-pi/raspberry-pi-gen-1/performance-counter-kernel-module/>
- Source code for Armv7 (32-bit):
<https://github.com/thoughtpolice/enable_arm_pmu>
- Source code for Armv8/Aarch64 (64-bit):
<https://github.com/rdolbeau/enable_arm_pmu>
* Steps to compile the kernel module on the raspberry pi:
- Find out the kernel version using `uname -a`
- Download the kernel header files, e.g. `raspberrypi-kernel-header`
- Download the source code for the Armv6 kernel module
- Build, install and load the kernel module
## Benchmark Ascon v1.2 using supercop
Download supercop according to the website: http://bench.cr.yp.to/supercop.html
To test only Ascon, just run the following commands:
```
./do-part init
./do-part crypto_aead ascon128v12
./do-part crypto_aead ascon128av12
./do-part crypto_aead ascon80pqv12
./do-part crypto_hash asconhashv12
./do-part crypto_hash asconxofv12
```
#!/bin/sh
ALG=$1
FACTOR=$2
for i in getcycles*$ALG*; do
echo
echo $i:
echo
echo "| 1 | 8 | 16 | 32 | 64 | 1536 | long |"
echo "|------:|------:|------:|------:|------:|------:|------:|"
for n in $(seq 5); do
./$i $FACTOR | tail -n 1
done | sort -n -k8 -t'|'
done 2>/dev/null
echo
......@@ -3,3 +3,4 @@
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
......@@ -3,7 +3,10 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
......@@ -11,21 +14,26 @@
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \
(((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \
(((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \
(((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56))
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \
(((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24))
#define U16BIG(x) \
((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8))
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#endif /* ENDIAN_H_ */
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
*K1 = XOR(*K1, LOAD64(k));
*K2 = XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1,
word_t K2) {
word_t N0, N1;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* initialization */
PINIT(s);
s->x0 = XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
s->x3 = XOR(s->x3, N0);
s->x4 = XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) *px = XOR(*px, LOAD(ad, adlen));
*px = XOR(*px, PAD(adlen));
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
s->x1 = XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
*px = XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
*px = XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
s->x1 = XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
*px = XOR(*px, cx);
STORE(m, *px, clen);
*px = CLEAR(*px, clen);
*px = XOR(*px, cx);
}
*px = XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) {
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
#if ASCON_INLINE_MODE
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#else
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
init(s, npub, K0, K1, K2);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
final(s, K0, K1, K2);
}
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, K0, K1, K2);
/* set tag */
c += mlen;
STOREBYTES(c, s.x3, 8);
STOREBYTES(c + 8, s.x4, 8);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, K0, K1, K2);
/* verify tag (should be constant time, check compiler output) */
c += clen;
s.x3 = XOR(s.x3, LOADBYTES(c, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
......@@ -3,3 +3,4 @@
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif /* ASCON_H */
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
......@@ -3,7 +3,10 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
......@@ -11,21 +14,26 @@
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \
(((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \
(((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \
(((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56))
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \
(((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24))
#define U16BIG(x) \
((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8))
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#endif /* ENDIAN_H_ */
#ifndef INTERLEAVE_H_
#define INTERLEAVE_H_
#include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
return x;
}
__forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
return x;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
lo = deinterleave_uint32(lo);
hi = deinterleave_uint32(hi);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
lo = interleave_uint32(lo);
hi = interleave_uint32(hi);
return (uint64_t)hi << 32 | lo;
}
#endif /* INTERLEAVE_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
{0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include "endian.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
#define to_bit_interleaving(out, in) \
do { \
u32 hi = (in) >> 32; \
u32 lo = (u32)(in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
(out).e = (lo & 0x0000FFFF) | (hi << 16); \
(out).o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
#define from_bit_interleaving(out, in) \
do { \
u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \
u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
} while (0)
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
x2.e ^= C_e; x2.o ^= C_o; \
/* s-box layer */ \
x0.e ^= x4.e; x0.o ^= x4.o; \
x4.e ^= x3.e; x4.o ^= x3.o; \
x2.e ^= x1.e; x2.o ^= x1.o; \
t0.e = x0.e; t0.o = x0.o; \
t4.e = x4.e; t4.o = x4.o; \
t3.e = x3.e; t3.o = x3.o; \
t1.e = x1.e; t1.o = x1.o; \
t2.e = x2.e; t2.o = x2.o; \
x0.e = t0.e ^ (~t1.e & t2.e); x0.o = t0.o ^ (~t1.o & t2.o); \
x2.e = t2.e ^ (~t3.e & t4.e); x2.o = t2.o ^ (~t3.o & t4.o); \
x4.e = t4.e ^ (~t0.e & t1.e); x4.o = t4.o ^ (~t0.o & t1.o); \
x1.e = t1.e ^ (~t2.e & t3.e); x1.o = t1.o ^ (~t2.o & t3.o); \
x3.e = t3.e ^ (~t4.e & t0.e); x3.o = t3.o ^ (~t4.o & t0.o); \
x1.e ^= x0.e; x1.o ^= x0.o; \
x3.e ^= x2.e; x3.o ^= x2.o; \
x0.e ^= x4.e; x0.o ^= x4.o; \
/* linear layer */ \
t0.e = x0.e ^ ROTR32(x0.o, 4); t0.o = x0.o ^ ROTR32(x0.e, 5); \
t1.e = x1.e ^ ROTR32(x1.e, 11); t1.o = x1.o ^ ROTR32(x1.o, 11); \
t2.e = x2.e ^ ROTR32(x2.o, 2); t2.o = x2.o ^ ROTR32(x2.e, 3); \
t3.e = x3.e ^ ROTR32(x3.o, 3); t3.o = x3.o ^ ROTR32(x3.e, 4); \
t4.e = x4.e ^ ROTR32(x4.e, 17); t4.o = x4.o ^ ROTR32(x4.o, 17); \
x0.e ^= ROTR32(t0.o, 9); x0.o ^= ROTR32(t0.e, 10); \
x1.e ^= ROTR32(t1.o, 19); x1.o ^= ROTR32(t1.e, 20); \
x2.e ^= t2.o; x2.o ^= ROTR32(t2.e, 1); \
x3.e ^= ROTR32(t3.e, 5); x3.o ^= ROTR32(t3.o, 5); \
x4.e ^= ROTR32(t4.o, 3); x4.o ^= ROTR32(t4.e, 4); \
x2.e = ~x2.e; x2.o = ~x2.o; \
} while(0)
#define P12() \
do { \
ROUND(0xc, 0xc); \
ROUND(0x9, 0xc); \
ROUND(0xc, 0x9); \
ROUND(0x9, 0x9); \
ROUND(0x6, 0xc); \
ROUND(0x3, 0xc); \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#define P8() \
do { \
ROUND(0x6, 0xc); \
ROUND(0x3, 0xc); \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#define P6() \
do { \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#endif // PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x8021000008220000)
#define ASCON_128A_IV WORD_T(0x8822000000200000)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
#include "word.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
s->x3 = WORD_T(0);
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
state_t t;
/* round constant */
s->x2.e ^= C_e;
s->x2.o ^= C_o;
/* s-box layer */
s->x0.e ^= s->x4.e;
s->x0.o ^= s->x4.o;
s->x4.e ^= s->x3.e;
s->x4.o ^= s->x3.o;
s->x2.e ^= s->x1.e;
s->x2.o ^= s->x1.o;
t.x0.e = s->x0.e;
t.x0.o = s->x0.o;
t.x4.e = s->x4.e;
t.x4.o = s->x4.o;
t.x3.e = s->x3.e;
t.x3.o = s->x3.o;
t.x1.e = s->x1.e;
t.x1.o = s->x1.o;
t.x2.e = s->x2.e;
t.x2.o = s->x2.o;
s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e);
s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o);
s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e);
s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o);
s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e);
s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o);
s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e);
s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o);
s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e);
s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o);
s->x1.e ^= s->x0.e;
s->x1.o ^= s->x0.o;
s->x3.e ^= s->x2.e;
s->x3.o ^= s->x2.o;
s->x0.e ^= s->x4.e;
s->x0.o ^= s->x4.o;
/* linear layer */
t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4);
t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5);
t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11);
t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11);
t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2);
t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3);
t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3);
t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4);
t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17);
t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17);
s->x0.e ^= ROR32(t.x0.o, 9);
s->x0.o ^= ROR32(t.x0.e, 10);
s->x1.e ^= ROR32(t.x1.o, 19);
s->x1.o ^= ROR32(t.x1.e, 20);
s->x2.e ^= t.x2.o;
s->x2.o ^= ROR32(t.x2.e, 1);
s->x3.e ^= ROR32(t.x3.e, 5);
s->x3.o ^= ROR32(t.x3.o, 5);
s->x4.e ^= ROR32(t.x4.o, 3);
s->x4.o ^= ROR32(t.x4.e, 4);
s->x2.e = ~s->x2.e;
s->x2.o = ~s->x2.o;
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "endian.h"
#include "interleave.h"
typedef struct {
uint32_t e;
uint32_t o;
} word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) {
a.e = ~a.e;
a.o = ~a.o;
return a;
}
__forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e;
a.o ^= b.o;
return a;
}
__forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e;
a.o &= b.o;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16;
return r;
}
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
}
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
__forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return AND(w, WORD_T((uint64_t)mask << 32 | mask));
}
__forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
#endif /* WORD_H_ */
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
*K1 = XOR(*K1, LOAD64(k));
*K2 = XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1,
word_t K2) {
word_t N0, N1;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* initialization */
PINIT(s);
s->x0 = XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
s->x3 = XOR(s->x3, N0);
s->x4 = XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) *px = XOR(*px, LOAD(ad, adlen));
*px = XOR(*px, PAD(adlen));
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
s->x1 = XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
*px = XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
*px = XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
s->x1 = XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
*px = XOR(*px, cx);
STORE(m, *px, clen);
*px = CLEAR(*px, clen);
*px = XOR(*px, cx);
}
*px = XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) {
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
#if ASCON_INLINE_MODE
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#else
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
init(s, npub, K0, K1, K2);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
final(s, K0, K1, K2);
}
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, K0, K1, K2);
/* set tag */
c += mlen;
STOREBYTES(c, s.x3, 8);
STOREBYTES(c + 8, s.x4, 8);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, K0, K1, K2);
/* verify tag (should be constant time, check compiler output) */
c += clen;
s.x3 = XOR(s.x3, LOADBYTES(c, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
......@@ -3,3 +3,4 @@
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif /* ASCON_H */
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
......@@ -3,7 +3,10 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
......@@ -11,21 +14,26 @@
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \
(((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \
(((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \
(((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56))
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \
(((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24))
#define U16BIG(x) \
((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8))
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#endif /* ENDIAN_H_ */
#ifndef INTERLEAVE_H_
#define INTERLEAVE_H_
#include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
return x;
}
__forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
return x;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
lo = deinterleave_uint32(lo);
hi = deinterleave_uint32(hi);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
lo = interleave_uint32(lo);
hi = interleave_uint32(hi);
return (uint64_t)hi << 32 | lo;
}
#endif /* INTERLEAVE_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
{0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include "endian.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
#define to_bit_interleaving(out, in) \
do { \
u32 hi = (in) >> 32; \
u32 lo = (u32)(in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
(out).e = (lo & 0x0000FFFF) | (hi << 16); \
(out).o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
#define from_bit_interleaving(out, in) \
do { \
u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \
u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
} while (0)
#define ROUND(C_e, C_o) \
do { \
u32 reg0, reg1, reg2, reg3; \
__asm__ __volatile__ ( \
"eor %[x2_e], %[x2_e], #" #C_e "\n\t" \
"eor %[x2_o], %[x2_o], #" #C_o "\n\t" \
"eor %[x0_e], %[x0_e], %[x4_e]\n\t" \
"eor %[x0_o], %[x0_o], %[x4_o]\n\t" \
"eor %[x4_e], %[x4_e], %[x3_e]\n\t" \
"eor %[x4_o], %[x4_o], %[x3_o]\n\t" \
"eor %[x2_e], %[x2_e], %[x1_e]\n\t" \
"eor %[x2_o], %[x2_o], %[x1_o]\n\t" \
"bic %[reg0], %[x0_e], %[x4_e]\n\t" \
"bic %[reg1], %[x4_e], %[x3_e]\n\t" \
"bic %[reg2], %[x2_e], %[x1_e]\n\t" \
"bic %[reg3], %[x1_e], %[x0_e]\n\t" \
"eor %[x2_e], %[x2_e], %[reg1]\n\t" \
"eor %[x0_e], %[x0_e], %[reg2]\n\t" \
"eor %[x4_e], %[x4_e], %[reg3]\n\t" \
"bic %[reg3], %[x3_e], %[x2_e]\n\t" \
"eor %[x3_e], %[x3_e], %[reg0]\n\t" \
"bic %[reg2], %[x0_o], %[x4_o]\n\t" \
"bic %[reg0], %[x2_o], %[x1_o]\n\t" \
"bic %[reg1], %[x4_o], %[x3_o]\n\t" \
"eor %[x1_e], %[x1_e], %[reg3]\n\t" \
"eor %[x0_o], %[x0_o], %[reg0]\n\t" \
"eor %[x2_o], %[x2_o], %[reg1]\n\t" \
"bic %[reg3], %[x1_o], %[x0_o]\n\t" \
"bic %[reg0], %[x3_o], %[x2_o]\n\t" \
"eor %[x3_o], %[x3_o], %[reg2]\n\t" \
"eor %[x3_o], %[x3_o], %[x2_o]\n\t" \
"eor %[x4_o], %[x4_o], %[reg3]\n\t" \
"eor %[x1_o], %[x1_o], %[reg0]\n\t" \
"eor %[x3_e], %[x3_e], %[x2_e]\n\t" \
"eor %[x1_e], %[x1_e], %[x0_e]\n\t" \
"eor %[x1_o], %[x1_o], %[x0_o]\n\t" \
"eor %[x0_e], %[x0_e], %[x4_e]\n\t" \
"eor %[x0_o], %[x0_o], %[x4_o]\n\t" \
"mvn %[x2_e], %[x2_e]\n\t" \
"mvn %[x2_o], %[x2_o]\n\t" \
"eor %[reg0], %[x0_e], %[x0_o], ror #4\n\t" \
"eor %[reg1], %[x0_o], %[x0_e], ror #5\n\t" \
"eor %[reg2], %[x1_e], %[x1_e], ror #11\n\t" \
"eor %[reg3], %[x1_o], %[x1_o], ror #11\n\t" \
"eor %[x0_e], %[x0_e], %[reg1], ror #9\n\t" \
"eor %[x0_o], %[x0_o], %[reg0], ror #10\n\t" \
"eor %[x1_e], %[x1_e], %[reg3], ror #19\n\t" \
"eor %[x1_o], %[x1_o], %[reg2], ror #20\n\t" \
"eor %[reg0], %[x2_e], %[x2_o], ror #2\n\t" \
"eor %[reg1], %[x2_o], %[x2_e], ror #3\n\t" \
"eor %[reg2], %[x3_e], %[x3_o], ror #3\n\t" \
"eor %[reg3], %[x3_o], %[x3_e], ror #4\n\t" \
"eor %[x2_e], %[x2_e], %[reg1]\n\t" \
"eor %[x2_o], %[x2_o], %[reg0], ror #1\n\t" \
"eor %[x3_e], %[x3_e], %[reg2], ror #5\n\t" \
"eor %[x3_o], %[x3_o], %[reg3], ror #5\n\t" \
"eor %[reg0], %[x4_e], %[x4_e], ror #17\n\t" \
"eor %[reg1], %[x4_o], %[x4_o], ror #17\n\t" \
"eor %[x4_e], %[x4_e], %[reg1], ror #3\n\t" \
"eor %[x4_o], %[x4_o], %[reg0], ror #4\n\t" \
: [x0_e] "+r" (x0.e), [x1_e] "+r" (x1.e), [x2_e] "+r" (x2.e), [x3_e] "+r" (x3.e), [x4_e] "+r" (x4.e), \
[x0_o] "+r" (x0.o), [x1_o] "+r" (x1.o), [x2_o] "+r" (x2.o), [x3_o] "+r" (x3.o), [x4_o] "+r" (x4.o), \
[reg0] "=r" (reg0), [reg1] "=r" (reg1), [reg2] "=r" (reg2), [reg3] "=r" (reg3)::); \
} while (0)
#define P12() \
do { \
ROUND(0xc, 0xc); \
ROUND(0x9, 0xc); \
ROUND(0xc, 0x9); \
ROUND(0x9, 0x9); \
ROUND(0x6, 0xc); \
ROUND(0x3, 0xc); \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#define P8() \
do { \
ROUND(0x6, 0xc); \
ROUND(0x3, 0xc); \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#define P6() \
do { \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#endif // PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x8021000008220000)
#define ASCON_128A_IV WORD_T(0x8822000000200000)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
#include "word.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
s->x3 = WORD_T(0);
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
uint32_t tmp0, tmp1, tmp2, tmp3;
/* clang-format off */
__asm__ __volatile__( \
"eor %[x2_e], %[x2_e], %[C_e]\n\t" \
"eor %[x2_o], %[x2_o], %[C_o]\n\t" \
"eor %[x0_e], %[x0_e], %[x4_e]\n\t" \
"eor %[x0_o], %[x0_o], %[x4_o]\n\t" \
"eor %[x4_e], %[x4_e], %[x3_e]\n\t" \
"eor %[x4_o], %[x4_o], %[x3_o]\n\t" \
"eor %[x2_e], %[x2_e], %[x1_e]\n\t" \
"eor %[x2_o], %[x2_o], %[x1_o]\n\t" \
"bic %[tmp0], %[x0_e], %[x4_e]\n\t" \
"bic %[tmp1], %[x4_e], %[x3_e]\n\t" \
"bic %[tmp2], %[x2_e], %[x1_e]\n\t" \
"bic %[tmp3], %[x1_e], %[x0_e]\n\t" \
"eor %[x2_e], %[x2_e], %[tmp1]\n\t" \
"eor %[x0_e], %[x0_e], %[tmp2]\n\t" \
"eor %[x4_e], %[x4_e], %[tmp3]\n\t" \
"bic %[tmp3], %[x3_e], %[x2_e]\n\t" \
"eor %[x3_e], %[x3_e], %[tmp0]\n\t" \
"bic %[tmp2], %[x0_o], %[x4_o]\n\t" \
"bic %[tmp0], %[x2_o], %[x1_o]\n\t" \
"bic %[tmp1], %[x4_o], %[x3_o]\n\t" \
"eor %[x1_e], %[x1_e], %[tmp3]\n\t" \
"eor %[x0_o], %[x0_o], %[tmp0]\n\t" \
"eor %[x2_o], %[x2_o], %[tmp1]\n\t" \
"bic %[tmp3], %[x1_o], %[x0_o]\n\t" \
"bic %[tmp0], %[x3_o], %[x2_o]\n\t" \
"eor %[x3_o], %[x3_o], %[tmp2]\n\t" \
"eor %[x3_o], %[x3_o], %[x2_o]\n\t" \
"eor %[x4_o], %[x4_o], %[tmp3]\n\t" \
"eor %[x1_o], %[x1_o], %[tmp0]\n\t" \
"eor %[x3_e], %[x3_e], %[x2_e]\n\t" \
"eor %[x1_e], %[x1_e], %[x0_e]\n\t" \
"eor %[x1_o], %[x1_o], %[x0_o]\n\t" \
"eor %[x0_e], %[x0_e], %[x4_e]\n\t" \
"eor %[x0_o], %[x0_o], %[x4_o]\n\t" \
"mvn %[x2_e], %[x2_e]\n\t" \
"mvn %[x2_o], %[x2_o]\n\t" \
"eor %[tmp0], %[x0_e], %[x0_o], ror #4\n\t" \
"eor %[tmp1], %[x0_o], %[x0_e], ror #5\n\t" \
"eor %[tmp2], %[x1_e], %[x1_e], ror #11\n\t" \
"eor %[tmp3], %[x1_o], %[x1_o], ror #11\n\t" \
"eor %[x0_e], %[x0_e], %[tmp1], ror #9\n\t" \
"eor %[x0_o], %[x0_o], %[tmp0], ror #10\n\t" \
"eor %[x1_e], %[x1_e], %[tmp3], ror #19\n\t" \
"eor %[x1_o], %[x1_o], %[tmp2], ror #20\n\t" \
"eor %[tmp0], %[x2_e], %[x2_o], ror #2\n\t" \
"eor %[tmp1], %[x2_o], %[x2_e], ror #3\n\t" \
"eor %[tmp2], %[x3_e], %[x3_o], ror #3\n\t" \
"eor %[tmp3], %[x3_o], %[x3_e], ror #4\n\t" \
"eor %[x2_e], %[x2_e], %[tmp1]\n\t" \
"eor %[x2_o], %[x2_o], %[tmp0], ror #1\n\t" \
"eor %[x3_e], %[x3_e], %[tmp2], ror #5\n\t" \
"eor %[x3_o], %[x3_o], %[tmp3], ror #5\n\t" \
"eor %[tmp0], %[x4_e], %[x4_e], ror #17\n\t" \
"eor %[tmp1], %[x4_o], %[x4_o], ror #17\n\t" \
"eor %[x4_e], %[x4_e], %[tmp1], ror #3\n\t" \
"eor %[x4_o], %[x4_o], %[tmp0], ror #4\n\t" \
: [ x0_e ] "+r"(s->x0.e), \
[ x1_e ] "+r"(s->x1.e), \
[ x2_e ] "+r"(s->x2.e), \
[ x3_e ] "+r"(s->x3.e), \
[ x4_e ] "+r"(s->x4.e), \
[ x0_o ] "+r"(s->x0.o), \
[ x1_o ] "+r"(s->x1.o), \
[ x2_o ] "+r"(s->x2.o), \
[ x3_o ] "+r"(s->x3.o), \
[ x4_o ] "+r"(s->x4.o), \
[ tmp0 ] "=r"(tmp0), \
[ tmp1 ] "=r"(tmp1), \
[ tmp2 ] "=r"(tmp2), \
[ tmp3 ] "=r"(tmp3) \
: [ C_e ] "i"(C_e), \
[ C_o ] "i"(C_o) \
: );
/* clang-format on */
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "endian.h"
#include "interleave.h"
typedef struct {
uint32_t e;
uint32_t o;
} word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) {
a.e = ~a.e;
a.o = ~a.o;
return a;
}
__forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e;
a.o ^= b.o;
return a;
}
__forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e;
a.o &= b.o;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16;
return r;
}
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
}
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
__forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return AND(w, WORD_T((uint64_t)mask << 32 | mask));
}
__forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
#endif /* WORD_H_ */
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
*K1 = XOR(*K1, LOAD64(k));
*K2 = XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1,
word_t K2) {
word_t N0, N1;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* initialization */
PINIT(s);
s->x0 = XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
s->x3 = XOR(s->x3, N0);
s->x4 = XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) *px = XOR(*px, LOAD(ad, adlen));
*px = XOR(*px, PAD(adlen));
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
s->x1 = XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
*px = XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
*px = XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
s->x1 = XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
*px = XOR(*px, cx);
STORE(m, *px, clen);
*px = CLEAR(*px, clen);
*px = XOR(*px, cx);
}
*px = XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, word_t K0, word_t K1, word_t K2) {
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
#if ASCON_INLINE_MODE
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#else
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
init(s, npub, K0, K1, K2);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
loadkey(&K0, &K1, &K2, k);
final(s, K0, K1, K2);
}
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, K0, K1, K2);
/* set tag */
c += mlen;
STOREBYTES(c, s.x3, 8);
STOREBYTES(c + 8, s.x4, 8);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
word_t K0, K1, K2;
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
loadkey(&K0, &K1, &K2, k);
INIT(&s, npub, K0, K1, K2);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, K0, K1, K2);
/* verify tag (should be constant time, check compiler output) */
c += clen;
s.x3 = XOR(s.x3, LOADBYTES(c, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
......@@ -3,3 +3,4 @@
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif /* ASCON_H */
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
......@@ -3,7 +3,10 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
......@@ -11,21 +14,26 @@
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \
(((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \
(((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \
(((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56))
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \
(((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24))
#define U16BIG(x) \
((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8))
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#endif /* ENDIAN_H_ */
#ifndef INTERLEAVE_H_
#define INTERLEAVE_H_
#include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
return x;
}
__forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
return x;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
lo = deinterleave_uint32(lo);
hi = deinterleave_uint32(hi);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
return (uint64_t)r1 << 32 | r0;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
lo = interleave_uint32(lo);
hi = interleave_uint32(hi);
return (uint64_t)hi << 32 | lo;
}
#endif /* INTERLEAVE_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
{0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include "endian.h"
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
#define to_bit_interleaving(out, in) \
do { \
u32 hi = (in) >> 32; \
u32 lo = (u32)(in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
(out).e = (lo & 0x0000FFFF) | (hi << 16); \
(out).o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
#define from_bit_interleaving(out, in) \
do { \
u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \
u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
} while (0)
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
x2.e ^= C_e; x2.o ^= C_o; \
/* s-box layer */ \
x0.e ^= x4.e; x0.o ^= x4.o; \
x4.e ^= x3.e; x4.o ^= x3.o; \
x2.e ^= x1.e; x2.o ^= x1.o; \
t0.e = x0.e & (~x4.e); t0.o = x0.o & (~x4.o); \
x0.e ^= x2.e & (~x1.e); x0.o ^= x2.o & (~x1.o); \
x2.e ^= x4.e & (~x3.e); x2.o ^= x4.o & (~x3.o); \
x4.e ^= x1.e & (~x0.e); x4.o ^= x1.o & (~x0.o); \
x1.e ^= x3.e & (~x2.e); x1.o ^= x3.o & (~x2.o); \
x3.e ^= t0.e; x3.o ^= t0.o; \
x1.e ^= x0.e; x1.o ^= x0.o; \
x3.e ^= x2.e; x3.o ^= x2.o; \
x0.e ^= x4.e; x0.o ^= x4.o; \
/* linear layer */ \
t0.e = x0.e ^ ROTR32(x0.o, 4); \
t0.o = x0.o ^ ROTR32(x0.e, 5); \
x0.e ^= ROTR32(t0.o, 9); \
x0.o ^= ROTR32(t0.e, 10); \
t0.e = x1.e ^ ROTR32(x1.e, 11); \
t0.o = x1.o ^ ROTR32(x1.o, 11); \
x1.e ^= ROTR32(t0.o, 19); \
x1.o ^= ROTR32(t0.e, 20); \
t0.e = x2.e ^ ROTR32(x2.o, 2); \
t0.o = x2.o ^ ROTR32(x2.e, 3); \
x2.e ^= t0.o; \
x2.o ^= ROTR32(t0.e, 1); \
t0.e = x3.e ^ ROTR32(x3.o, 3); \
t0.o = x3.o ^ ROTR32(x3.e, 4); \
x3.e ^= ROTR32(t0.e, 5); \
x3.o ^= ROTR32(t0.o, 5); \
t0.e = x4.e ^ ROTR32(x4.e, 17); \
t0.o = x4.o ^ ROTR32(x4.o, 17); \
x4.e ^= ROTR32(t0.o, 3); \
x4.o ^= ROTR32(t0.e, 4); \
x2.e = ~x2.e; x2.o = ~x2.o; \
} while(0)
#define P12() \
do { \
ROUND(0xc, 0xc); \
ROUND(0x9, 0xc); \
ROUND(0xc, 0x9); \
ROUND(0x9, 0x9); \
ROUND(0x6, 0xc); \
ROUND(0x3, 0xc); \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#define P8() \
do { \
ROUND(0x6, 0xc); \
ROUND(0x3, 0xc); \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#define P6() \
do { \
ROUND(0x6, 0x9); \
ROUND(0x3, 0x9); \
ROUND(0xc, 0x6); \
ROUND(0x9, 0x6); \
ROUND(0xc, 0x3); \
ROUND(0x9, 0x3); \
} while (0)
#endif // PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x8021000008220000)
#define ASCON_128A_IV WORD_T(0x8822000000200000)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
#include "word.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
s->x3 = WORD_T(0);
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
word_t tmp, C = {.o = C_o, .e = C_e};
/* round constant */
s->x2 = XOR(s->x2, C);
/* s-box layer */
s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1);
tmp = AND(s->x0, NOT(s->x4));
s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1)));
s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3)));
s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0)));
s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2)));
s->x3 = XOR(s->x3, tmp);
s->x1 = XOR(s->x1, s->x0);
s->x3 = XOR(s->x3, s->x2);
s->x0 = XOR(s->x0, s->x4);
/* linear layer */
tmp = XOR(s->x0, ROR64(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR64(tmp, 19));
tmp = XOR(s->x1, ROR64(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR64(tmp, 39));
tmp = XOR(s->x2, ROR64(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR64(tmp, 1));
tmp = XOR(s->x3, ROR64(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR64(tmp, 10));
tmp = XOR(s->x4, ROR64(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR64(tmp, 7));
s->x2 = NOT(s->x2);
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "endian.h"
#include "interleave.h"
typedef struct {
uint32_t e;
uint32_t o;
} word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) {
a.e = ~a.e;
a.o = ~a.o;
return a;
}
__forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e;
a.o ^= b.o;
return a;
}
__forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e;
a.o &= b.o;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16;
return r;
}
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
}
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
__forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return AND(w, WORD_T((uint64_t)mask << 32 | mask));
}
__forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
#endif /* WORD_H_ */
#include "api.h"
#include "ascon.h"
#include "permutations.h"
#include "printstate.h"
void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode);
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode) {
word_t K0, K1, K2;
/* load key */
if (CRYPTO_KEYBYTES == 20) {
K0 = KEYROT(WORD_T(0), LOAD(k, 4));
k += 4;
}
K1 = LOAD64(k);
K2 = LOAD64(k + 8);
/* initialization */
s->x0 = IV;
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = K1;
s->x2 = K2;
s->x3 = LOAD64(npub);
s->x4 = LOAD64(npub + 8);
P12(s);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
/* process associated data */
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_ABSORB);
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
/* process plaintext/ciphertext */
process_data(s, out, in, tlen, mode);
if (mode == ASCON_ENCRYPT) printstate("process plaintext", s);
if (mode == ASCON_DECRYPT) printstate("process ciphertext", s);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
s->x1 = XOR(s->x1, KEYROT(K0, K1));
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
......@@ -3,3 +3,4 @@
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode);
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode) {
word_t N0, N1, K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
if (CRYPTO_KEYBYTES == 20) {
K0 = KEYROT(WORD_T(0), LOAD(k, 4));
k += 4;
}
K1 = LOAD64(k);
K2 = LOAD64(k + 8);
/* initialization */
s->x0 = IV;
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
s->x1 = K1;
s->x2 = K2;
s->x3 = N0;
s->x4 = N1;
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
/* process associated data */
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
/* process plaintext/ciphertext */
process_data(s, out, in, tlen, mode);
if (mode == ASCON_ENC) printstate("process plaintext", s);
if (mode == ASCON_DEC) printstate("process ciphertext", s);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
#define ASCON_ABSORB 0x1
#define ASCON_SQUEEZE 0x2
#define ASCON_INSERT 0x4
#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE)
#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT)
void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode);
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode);
#endif /* ASCON_H */
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 1
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment